From 07bf2dff78dce04314447efe46ccb70480374daf Mon Sep 17 00:00:00 2001 From: Joseph Enguehard Date: Mon, 20 May 2024 09:06:57 +0100 Subject: [PATCH 01/80] Add TokenClassification for Mistral, Mixtral and Qwen2 (#29878) * Add MistralForTokenClassification * Add tests and docs * Add token classification for Mixtral and Qwen2 * Save llma for token classification draft * Add token classification support for Llama, Gemma, Persimmon, StableLm and StarCoder2 * Formatting * Add token classification support for Qwen2Moe model * Add dropout layer to each ForTokenClassification model * Add copied from in tests * Update src/transformers/models/llama/modeling_llama.py Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> * Propagate suggested changes * Style --------- Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> --- docs/source/en/model_doc/gemma.md | 5 + docs/source/en/model_doc/llama.md | 5 + docs/source/en/model_doc/mistral.md | 5 + docs/source/en/model_doc/mixtral.md | 5 + docs/source/en/model_doc/persimmon.md | 5 + docs/source/en/model_doc/qwen2.md | 5 + docs/source/en/model_doc/qwen2_moe.md | 5 + docs/source/en/model_doc/stablelm.md | 5 + docs/source/en/model_doc/starcoder2.md | 5 + src/transformers/__init__.py | 25 ++++- src/transformers/models/auto/modeling_auto.py | 9 ++ src/transformers/models/gemma/__init__.py | 2 + .../models/gemma/modeling_gemma.py | 92 ++++++++++++++++++- src/transformers/models/llama/__init__.py | 2 + .../models/llama/modeling_llama.py | 85 +++++++++++++++++ src/transformers/models/mistral/__init__.py | 2 + .../models/mistral/modeling_mistral.py | 92 ++++++++++++++++++- src/transformers/models/mixtral/__init__.py | 2 + .../models/mixtral/modeling_mixtral.py | 86 +++++++++++++++++ src/transformers/models/persimmon/__init__.py | 2 + .../models/persimmon/modeling_persimmon.py | 92 ++++++++++++++++++- src/transformers/models/qwen2/__init__.py | 2 + .../models/qwen2/modeling_qwen2.py | 92 ++++++++++++++++++- src/transformers/models/qwen2_moe/__init__.py | 2 + .../models/qwen2_moe/modeling_qwen2_moe.py | 92 ++++++++++++++++++- src/transformers/models/stablelm/__init__.py | 2 + .../models/stablelm/modeling_stablelm.py | 92 ++++++++++++++++++- .../models/starcoder2/__init__.py | 2 + .../models/starcoder2/modeling_starcoder2.py | 92 ++++++++++++++++++- src/transformers/utils/dummy_pt_objects.py | 63 +++++++++++++ tests/models/gemma/test_modeling_gemma.py | 31 ++++++- tests/models/llama/test_modeling_llama.py | 25 ++++- tests/models/mistral/test_modeling_mistral.py | 22 ++++- tests/models/mixtral/test_modeling_mixtral.py | 28 +++++- .../persimmon/test_modeling_persimmon.py | 22 ++++- tests/models/qwen2/test_modeling_qwen2.py | 24 ++++- .../qwen2_moe/test_modeling_qwen2_moe.py | 22 ++++- .../models/stablelm/test_modeling_stablelm.py | 22 ++++- .../starcoder2/test_modeling_starcoder2.py | 22 ++++- 39 files changed, 1174 insertions(+), 19 deletions(-) diff --git a/docs/source/en/model_doc/gemma.md b/docs/source/en/model_doc/gemma.md index f55995b6d8..abd077af8d 100644 --- a/docs/source/en/model_doc/gemma.md +++ b/docs/source/en/model_doc/gemma.md @@ -60,6 +60,11 @@ This model was contributed by [Arthur Zucker](https://huggingface.co/ArthurZ), [ [[autodoc]] GemmaForSequenceClassification - forward +## GemmaForTokenClassification + +[[autodoc]] GemmaForTokenClassification + - forward + ## FlaxGemmaModel [[autodoc]] FlaxGemmaModel diff --git a/docs/source/en/model_doc/llama.md b/docs/source/en/model_doc/llama.md index 915d5ecc70..2f0eb63da0 100644 --- a/docs/source/en/model_doc/llama.md +++ b/docs/source/en/model_doc/llama.md @@ -121,6 +121,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h [[autodoc]] LlamaForQuestionAnswering - forward +## LlamaForTokenClassification + +[[autodoc]] LlamaForTokenClassification + - forward + ## FlaxLlamaModel [[autodoc]] FlaxLlamaModel diff --git a/docs/source/en/model_doc/mistral.md b/docs/source/en/model_doc/mistral.md index 0ab2142061..d4bc761060 100644 --- a/docs/source/en/model_doc/mistral.md +++ b/docs/source/en/model_doc/mistral.md @@ -203,6 +203,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h [[autodoc]] MistralForSequenceClassification - forward +## MistralForTokenClassification + +[[autodoc]] MistralForTokenClassification + - forward + ## FlaxMistralModel [[autodoc]] FlaxMistralModel diff --git a/docs/source/en/model_doc/mixtral.md b/docs/source/en/model_doc/mixtral.md index 942b040c3f..b93acdec58 100644 --- a/docs/source/en/model_doc/mixtral.md +++ b/docs/source/en/model_doc/mixtral.md @@ -204,3 +204,8 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h [[autodoc]] MixtralForSequenceClassification - forward + +## MixtralForTokenClassification + +[[autodoc]] MixtralForTokenClassification + - forward diff --git a/docs/source/en/model_doc/persimmon.md b/docs/source/en/model_doc/persimmon.md index fe9e66a0b7..7a105ac554 100644 --- a/docs/source/en/model_doc/persimmon.md +++ b/docs/source/en/model_doc/persimmon.md @@ -96,3 +96,8 @@ The `LlamaTokenizer` is used as it is a standard wrapper around sentencepiece. T [[autodoc]] PersimmonForSequenceClassification - forward + +## PersimmonForTokenClassification + +[[autodoc]] PersimmonForTokenClassification + - forward diff --git a/docs/source/en/model_doc/qwen2.md b/docs/source/en/model_doc/qwen2.md index 5f9e5dba22..ac0e25e02c 100644 --- a/docs/source/en/model_doc/qwen2.md +++ b/docs/source/en/model_doc/qwen2.md @@ -80,3 +80,8 @@ In the following, we demonstrate how to use `Qwen2-7B-Chat-beta` for the inferen [[autodoc]] Qwen2ForSequenceClassification - forward + +## Qwen2ForTokenClassification + +[[autodoc]] Qwen2ForTokenClassification + - forward diff --git a/docs/source/en/model_doc/qwen2_moe.md b/docs/source/en/model_doc/qwen2_moe.md index 8a546c4016..9c6dc80beb 100644 --- a/docs/source/en/model_doc/qwen2_moe.md +++ b/docs/source/en/model_doc/qwen2_moe.md @@ -75,3 +75,8 @@ In the following, we demonstrate how to use `Qwen1.5-MoE-A2.7B-Chat` for the inf [[autodoc]] Qwen2MoeForSequenceClassification - forward + +## Qwen2MoeForTokenClassification + +[[autodoc]] Qwen2MoeForTokenClassification + - forward diff --git a/docs/source/en/model_doc/stablelm.md b/docs/source/en/model_doc/stablelm.md index 6a50995ca0..09c0e5855c 100644 --- a/docs/source/en/model_doc/stablelm.md +++ b/docs/source/en/model_doc/stablelm.md @@ -104,3 +104,8 @@ Now, to run the model with Flash Attention 2, refer to the snippet below: [[autodoc]] StableLmForSequenceClassification - forward + +## StableLmForTokenClassification + +[[autodoc]] StableLmForTokenClassification + - forward diff --git a/docs/source/en/model_doc/starcoder2.md b/docs/source/en/model_doc/starcoder2.md index 9e2e547b8c..1d107b3855 100644 --- a/docs/source/en/model_doc/starcoder2.md +++ b/docs/source/en/model_doc/starcoder2.md @@ -66,3 +66,8 @@ These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hu [[autodoc]] Starcoder2ForSequenceClassification - forward + +## Starcoder2ForTokenClassification + +[[autodoc]] Starcoder2ForTokenClassification + - forward diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 18faabc807..4255e30379 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -2031,6 +2031,7 @@ else: [ "GemmaForCausalLM", "GemmaForSequenceClassification", + "GemmaForTokenClassification", "GemmaModel", "GemmaPreTrainedModel", ] @@ -2288,6 +2289,7 @@ else: "LlamaForCausalLM", "LlamaForQuestionAnswering", "LlamaForSequenceClassification", + "LlamaForTokenClassification", "LlamaModel", "LlamaPreTrainedModel", ] @@ -2435,12 +2437,19 @@ else: [ "MistralForCausalLM", "MistralForSequenceClassification", + "MistralForTokenClassification", "MistralModel", "MistralPreTrainedModel", ] ) _import_structure["models.mixtral"].extend( - ["MixtralForCausalLM", "MixtralForSequenceClassification", "MixtralModel", "MixtralPreTrainedModel"] + [ + "MixtralForCausalLM", + "MixtralForSequenceClassification", + "MixtralForTokenClassification", + "MixtralModel", + "MixtralPreTrainedModel", + ] ) _import_structure["models.mobilebert"].extend( [ @@ -2714,6 +2723,7 @@ else: [ "PersimmonForCausalLM", "PersimmonForSequenceClassification", + "PersimmonForTokenClassification", "PersimmonModel", "PersimmonPreTrainedModel", ] @@ -2810,6 +2820,7 @@ else: [ "Qwen2ForCausalLM", "Qwen2ForSequenceClassification", + "Qwen2ForTokenClassification", "Qwen2Model", "Qwen2PreTrainedModel", ] @@ -2818,6 +2829,7 @@ else: [ "Qwen2MoeForCausalLM", "Qwen2MoeForSequenceClassification", + "Qwen2MoeForTokenClassification", "Qwen2MoeModel", "Qwen2MoePreTrainedModel", ] @@ -3066,6 +3078,7 @@ else: [ "StableLmForCausalLM", "StableLmForSequenceClassification", + "StableLmForTokenClassification", "StableLmModel", "StableLmPreTrainedModel", ] @@ -3074,6 +3087,7 @@ else: [ "Starcoder2ForCausalLM", "Starcoder2ForSequenceClassification", + "Starcoder2ForTokenClassification", "Starcoder2Model", "Starcoder2PreTrainedModel", ] @@ -6489,6 +6503,7 @@ if TYPE_CHECKING: from .models.gemma import ( GemmaForCausalLM, GemmaForSequenceClassification, + GemmaForTokenClassification, GemmaModel, GemmaPreTrainedModel, ) @@ -6686,6 +6701,7 @@ if TYPE_CHECKING: LlamaForCausalLM, LlamaForQuestionAnswering, LlamaForSequenceClassification, + LlamaForTokenClassification, LlamaModel, LlamaPreTrainedModel, ) @@ -6801,12 +6817,14 @@ if TYPE_CHECKING: from .models.mistral import ( MistralForCausalLM, MistralForSequenceClassification, + MistralForTokenClassification, MistralModel, MistralPreTrainedModel, ) from .models.mixtral import ( MixtralForCausalLM, MixtralForSequenceClassification, + MixtralForTokenClassification, MixtralModel, MixtralPreTrainedModel, ) @@ -7025,6 +7043,7 @@ if TYPE_CHECKING: from .models.persimmon import ( PersimmonForCausalLM, PersimmonForSequenceClassification, + PersimmonForTokenClassification, PersimmonModel, PersimmonPreTrainedModel, ) @@ -7099,12 +7118,14 @@ if TYPE_CHECKING: from .models.qwen2 import ( Qwen2ForCausalLM, Qwen2ForSequenceClassification, + Qwen2ForTokenClassification, Qwen2Model, Qwen2PreTrainedModel, ) from .models.qwen2_moe import ( Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification, + Qwen2MoeForTokenClassification, Qwen2MoeModel, Qwen2MoePreTrainedModel, ) @@ -7306,12 +7327,14 @@ if TYPE_CHECKING: from .models.stablelm import ( StableLmForCausalLM, StableLmForSequenceClassification, + StableLmForTokenClassification, StableLmModel, StableLmPreTrainedModel, ) from .models.starcoder2 import ( Starcoder2ForCausalLM, Starcoder2ForSequenceClassification, + Starcoder2ForTokenClassification, Starcoder2Model, Starcoder2PreTrainedModel, ) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 7825a0217f..c16ab83bdc 100755 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -1038,6 +1038,7 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict( ("flaubert", "FlaubertForTokenClassification"), ("fnet", "FNetForTokenClassification"), ("funnel", "FunnelForTokenClassification"), + ("gemma", "GemmaForTokenClassification"), ("gpt-sw3", "GPT2ForTokenClassification"), ("gpt2", "GPT2ForTokenClassification"), ("gpt_bigcode", "GPTBigCodeForTokenClassification"), @@ -1048,11 +1049,14 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict( ("layoutlmv2", "LayoutLMv2ForTokenClassification"), ("layoutlmv3", "LayoutLMv3ForTokenClassification"), ("lilt", "LiltForTokenClassification"), + ("llama", "LlamaForTokenClassification"), ("longformer", "LongformerForTokenClassification"), ("luke", "LukeForTokenClassification"), ("markuplm", "MarkupLMForTokenClassification"), ("mega", "MegaForTokenClassification"), ("megatron-bert", "MegatronBertForTokenClassification"), + ("mistral", "MistralForTokenClassification"), + ("mixtral", "MixtralForTokenClassification"), ("mobilebert", "MobileBertForTokenClassification"), ("mpnet", "MPNetForTokenClassification"), ("mpt", "MptForTokenClassification"), @@ -1060,15 +1064,20 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict( ("mt5", "MT5ForTokenClassification"), ("nezha", "NezhaForTokenClassification"), ("nystromformer", "NystromformerForTokenClassification"), + ("persimmon", "PersimmonForTokenClassification"), ("phi", "PhiForTokenClassification"), ("phi3", "Phi3ForTokenClassification"), ("qdqbert", "QDQBertForTokenClassification"), + ("qwen2", "Qwen2ForTokenClassification"), + ("qwen2_moe", "Qwen2MoeForTokenClassification"), ("rembert", "RemBertForTokenClassification"), ("roberta", "RobertaForTokenClassification"), ("roberta-prelayernorm", "RobertaPreLayerNormForTokenClassification"), ("roc_bert", "RoCBertForTokenClassification"), ("roformer", "RoFormerForTokenClassification"), ("squeezebert", "SqueezeBertForTokenClassification"), + ("stablelm", "StableLmForTokenClassification"), + ("starcoder2", "Starcoder2ForTokenClassification"), ("t5", "T5ForTokenClassification"), ("umt5", "UMT5ForTokenClassification"), ("xlm", "XLMForTokenClassification"), diff --git a/src/transformers/models/gemma/__init__.py b/src/transformers/models/gemma/__init__.py index 1c832e9051..1aafae6e88 100644 --- a/src/transformers/models/gemma/__init__.py +++ b/src/transformers/models/gemma/__init__.py @@ -55,6 +55,7 @@ else: "GemmaModel", "GemmaPreTrainedModel", "GemmaForSequenceClassification", + "GemmaForTokenClassification", ] try: @@ -98,6 +99,7 @@ if TYPE_CHECKING: from .modeling_gemma import ( GemmaForCausalLM, GemmaForSequenceClassification, + GemmaForTokenClassification, GemmaModel, GemmaPreTrainedModel, ) diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index a3fc4b171d..ec88074ad5 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -30,7 +30,12 @@ from ...modeling_attn_mask_utils import ( AttentionMaskConverter, _prepare_4d_causal_attention_mask, ) -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13 from ...utils import ( @@ -1346,3 +1351,88 @@ class GemmaForSequenceClassification(GemmaPreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Gemma Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + GEMMA_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Gemma, LLAMA->GEMMA +class GemmaForTokenClassification(GemmaPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = GemmaModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(GEMMA_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/llama/__init__.py b/src/transformers/models/llama/__init__.py index 4b8a33118c..3f6461c4c0 100644 --- a/src/transformers/models/llama/__init__.py +++ b/src/transformers/models/llama/__init__.py @@ -55,6 +55,7 @@ else: "LlamaPreTrainedModel", "LlamaForSequenceClassification", "LlamaForQuestionAnswering", + "LlamaForTokenClassification", ] try: @@ -95,6 +96,7 @@ if TYPE_CHECKING: LlamaForCausalLM, LlamaForQuestionAnswering, LlamaForSequenceClassification, + LlamaForTokenClassification, LlamaModel, LlamaPreTrainedModel, ) diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index ba02a7fc77..56a83c178c 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -36,6 +36,7 @@ from ...modeling_outputs import ( CausalLMOutputWithPast, QuestionAnsweringModelOutput, SequenceClassifierOutputWithPast, + TokenClassifierOutput, ) from ...modeling_utils import PreTrainedModel from ...pytorch_utils import ALL_LAYERNORM_LAYERS @@ -1516,3 +1517,87 @@ class LlamaForQuestionAnswering(LlamaPreTrainedModel): hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) + + +@add_start_docstrings( + """ + The Llama Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + LLAMA_START_DOCSTRING, +) +class LlamaForTokenClassification(LlamaPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = LlamaModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/mistral/__init__.py b/src/transformers/models/mistral/__init__.py index dc0b85980f..abf1e32a4b 100644 --- a/src/transformers/models/mistral/__init__.py +++ b/src/transformers/models/mistral/__init__.py @@ -32,6 +32,7 @@ else: "MistralModel", "MistralPreTrainedModel", "MistralForSequenceClassification", + "MistralForTokenClassification", ] try: @@ -59,6 +60,7 @@ if TYPE_CHECKING: from .modeling_mistral import ( MistralForCausalLM, MistralForSequenceClassification, + MistralForTokenClassification, MistralModel, MistralPreTrainedModel, ) diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index ccfa034ae6..cc5d8f0862 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -31,7 +31,12 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...utils import ( add_start_docstrings, @@ -1366,3 +1371,88 @@ class MistralForSequenceClassification(MistralPreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Mistral Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + MISTRAL_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Mistral, LLAMA->MISTRAL +class MistralForTokenClassification(MistralPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = MistralModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/mixtral/__init__.py b/src/transformers/models/mixtral/__init__.py index 7b8f061dac..b124d41dfb 100644 --- a/src/transformers/models/mixtral/__init__.py +++ b/src/transformers/models/mixtral/__init__.py @@ -36,6 +36,7 @@ else: "MixtralModel", "MixtralPreTrainedModel", "MixtralForSequenceClassification", + "MixtralForTokenClassification", ] @@ -51,6 +52,7 @@ if TYPE_CHECKING: from .modeling_mixtral import ( MixtralForCausalLM, MixtralForSequenceClassification, + MixtralForTokenClassification, MixtralModel, MixtralPreTrainedModel, ) diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 19e4f15718..7bafe49905 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -38,6 +38,7 @@ from ...modeling_outputs import ( MoeCausalLMOutputWithPast, MoeModelOutputWithPast, SequenceClassifierOutputWithPast, + TokenClassifierOutput, ) from ...modeling_utils import PreTrainedModel from ...pytorch_utils import is_torch_greater_or_equal_than_1_13 @@ -1582,3 +1583,88 @@ class MixtralForSequenceClassification(MixtralPreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Mixtral Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + MIXTRAL_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Mixtral, LLAMA->MIXTRAL +class MixtralForTokenClassification(MixtralPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = MixtralModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/persimmon/__init__.py b/src/transformers/models/persimmon/__init__.py index 75bc218a29..e1f24ca1b7 100644 --- a/src/transformers/models/persimmon/__init__.py +++ b/src/transformers/models/persimmon/__init__.py @@ -36,6 +36,7 @@ else: "PersimmonModel", "PersimmonPreTrainedModel", "PersimmonForSequenceClassification", + "PersimmonForTokenClassification", ] @@ -51,6 +52,7 @@ if TYPE_CHECKING: from .modeling_persimmon import ( PersimmonForCausalLM, PersimmonForSequenceClassification, + PersimmonForTokenClassification, PersimmonModel, PersimmonPreTrainedModel, ) diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 8d4ad53207..75ba7163ba 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -29,7 +29,12 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings from .configuration_persimmon import PersimmonConfig @@ -1011,3 +1016,88 @@ class PersimmonForSequenceClassification(PersimmonPreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Persimmon Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + PERSIMMON_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Persimmon, LLAMA->PERSIMMON +class PersimmonForTokenClassification(PersimmonPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = PersimmonModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(PERSIMMON_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/qwen2/__init__.py b/src/transformers/models/qwen2/__init__.py index 3409f28214..35df37e91a 100644 --- a/src/transformers/models/qwen2/__init__.py +++ b/src/transformers/models/qwen2/__init__.py @@ -45,6 +45,7 @@ else: "Qwen2Model", "Qwen2PreTrainedModel", "Qwen2ForSequenceClassification", + "Qwen2ForTokenClassification", ] @@ -69,6 +70,7 @@ if TYPE_CHECKING: from .modeling_qwen2 import ( Qwen2ForCausalLM, Qwen2ForSequenceClassification, + Qwen2ForTokenClassification, Qwen2Model, Qwen2PreTrainedModel, ) diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 76b975ce9b..c9b8bd8fde 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -31,7 +31,12 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...utils import ( add_start_docstrings, @@ -1375,3 +1380,88 @@ class Qwen2ForSequenceClassification(Qwen2PreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Qwen2 Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + QWEN2_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Qwen2, LLAMA->QWEN2 +class Qwen2ForTokenClassification(Qwen2PreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = Qwen2Model(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/qwen2_moe/__init__.py b/src/transformers/models/qwen2_moe/__init__.py index fb12383278..e2b73ba2d1 100644 --- a/src/transformers/models/qwen2_moe/__init__.py +++ b/src/transformers/models/qwen2_moe/__init__.py @@ -36,6 +36,7 @@ else: "Qwen2MoeModel", "Qwen2MoePreTrainedModel", "Qwen2MoeForSequenceClassification", + "Qwen2MoeForTokenClassification", ] @@ -51,6 +52,7 @@ if TYPE_CHECKING: from .modeling_qwen2_moe import ( Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification, + Qwen2MoeForTokenClassification, Qwen2MoeModel, Qwen2MoePreTrainedModel, ) diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 914a2dbdf4..f8d1bf6bb4 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -32,7 +32,12 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa -from ...modeling_outputs import MoeCausalLMOutputWithPast, MoeModelOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + MoeCausalLMOutputWithPast, + MoeModelOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...utils import ( add_start_docstrings, @@ -1571,3 +1576,88 @@ class Qwen2MoeForSequenceClassification(Qwen2MoePreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Qwen2MoE Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + QWEN2MOE_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Qwen2Moe, LLAMA->QWEN2MOE +class Qwen2MoeForTokenClassification(Qwen2MoePreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = Qwen2MoeModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(QWEN2MOE_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/stablelm/__init__.py b/src/transformers/models/stablelm/__init__.py index 7fc3a6857f..c00c045f7f 100644 --- a/src/transformers/models/stablelm/__init__.py +++ b/src/transformers/models/stablelm/__init__.py @@ -36,6 +36,7 @@ else: "StableLmModel", "StableLmPreTrainedModel", "StableLmForSequenceClassification", + "StableLmForTokenClassification", ] @@ -51,6 +52,7 @@ if TYPE_CHECKING: from .modeling_stablelm import ( StableLmForCausalLM, StableLmForSequenceClassification, + StableLmForTokenClassification, StableLmModel, StableLmPreTrainedModel, ) diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index bc133ffb3d..91a6e83a8b 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -30,7 +30,12 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...utils import ( add_start_docstrings, @@ -1383,3 +1388,88 @@ class StableLmForSequenceClassification(StableLmPreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The StableLm Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + STABLELM_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->StableLm, LLAMA->STABLELM +class StableLmForTokenClassification(StableLmPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = StableLmModel(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(STABLELM_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/models/starcoder2/__init__.py b/src/transformers/models/starcoder2/__init__.py index 1eb195fde1..d9dc2cd1e5 100644 --- a/src/transformers/models/starcoder2/__init__.py +++ b/src/transformers/models/starcoder2/__init__.py @@ -36,6 +36,7 @@ else: "Starcoder2Model", "Starcoder2PreTrainedModel", "Starcoder2ForSequenceClassification", + "Starcoder2ForTokenClassification", ] @@ -51,6 +52,7 @@ if TYPE_CHECKING: from .modeling_starcoder2 import ( Starcoder2ForCausalLM, Starcoder2ForSequenceClassification, + Starcoder2ForTokenClassification, Starcoder2Model, Starcoder2PreTrainedModel, ) diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index 24d285e0b3..ae64b23aa6 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -31,7 +31,12 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ...modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, + TokenClassifierOutput, +) from ...modeling_utils import PreTrainedModel from ...utils import ( add_start_docstrings, @@ -1357,3 +1362,88 @@ class Starcoder2ForSequenceClassification(Starcoder2PreTrainedModel): hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +@add_start_docstrings( + """ + The Starcoder2 Model transformer with a token classification head on top (a linear layer on top of the hidden-states + output) e.g. for Named-Entity-Recognition (NER) tasks. + """, + STARCODER2_START_DOCSTRING, +) +# Copied from transformers.models.llama.modeling_llama.LlamaForTokenClassification with Llama->Starcoder2, LLAMA->STARCODER2 +class Starcoder2ForTokenClassification(Starcoder2PreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = Starcoder2Model(config) + if getattr(config, "classifier_dropout", None) is not None: + classifier_dropout = config.classifier_dropout + elif getattr(config, "hidden_dropout", None) is not None: + classifier_dropout = config.hidden_dropout + else: + classifier_dropout = 0.1 + self.dropout = nn.Dropout(classifier_dropout) + self.score = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(STARCODER2_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = outputs[0] + sequence_output = self.dropout(sequence_output) + logits = self.score(sequence_output) + + loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[2:] + return ((loss,) + output) if loss is not None else output + + return TokenClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 681f858556..5e00230aed 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -3692,6 +3692,13 @@ class GemmaForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class GemmaForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class GemmaModel(metaclass=DummyObject): _backends = ["torch"] @@ -4642,6 +4649,13 @@ class LlamaForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class LlamaForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class LlamaModel(metaclass=DummyObject): _backends = ["torch"] @@ -5237,6 +5251,13 @@ class MistralForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class MistralForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class MistralModel(metaclass=DummyObject): _backends = ["torch"] @@ -5265,6 +5286,13 @@ class MixtralForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class MixtralForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class MixtralModel(metaclass=DummyObject): _backends = ["torch"] @@ -6373,6 +6401,13 @@ class PersimmonForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class PersimmonForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class PersimmonModel(metaclass=DummyObject): _backends = ["torch"] @@ -6734,6 +6769,13 @@ class Qwen2ForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class Qwen2ForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class Qwen2Model(metaclass=DummyObject): _backends = ["torch"] @@ -6762,6 +6804,13 @@ class Qwen2MoeForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class Qwen2MoeForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class Qwen2MoeModel(metaclass=DummyObject): _backends = ["torch"] @@ -7793,6 +7842,13 @@ class StableLmForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class StableLmForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class StableLmModel(metaclass=DummyObject): _backends = ["torch"] @@ -7821,6 +7877,13 @@ class Starcoder2ForSequenceClassification(metaclass=DummyObject): requires_backends(self, ["torch"]) +class Starcoder2ForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class Starcoder2Model(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index 7cc2ebe7bd..8281f59993 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -41,7 +41,13 @@ from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): import torch - from transformers import GemmaForCausalLM, GemmaForSequenceClassification, GemmaModel, GemmaTokenizer + from transformers import ( + GemmaForCausalLM, + GemmaForSequenceClassification, + GemmaForTokenClassification, + GemmaModel, + GemmaTokenizer, + ) class GemmaModelTester: @@ -284,12 +290,17 @@ class GemmaModelTester: @require_torch class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = (GemmaModel, GemmaForCausalLM, GemmaForSequenceClassification) if is_torch_available() else () + all_model_classes = ( + (GemmaModel, GemmaForCausalLM, GemmaForSequenceClassification, GemmaForTokenClassification) + if is_torch_available() + else () + ) all_generative_model_classes = (GemmaForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( { "feature-extraction": GemmaModel, "text-classification": GemmaForSequenceClassification, + "token-classification": GemmaForTokenClassification, "text-generation": GemmaForCausalLM, "zero-shot": GemmaForSequenceClassification, } @@ -370,6 +381,22 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Gemma,llama->Gemma + def test_Gemma_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = GemmaForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Gemma buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 9fcdcc6d7e..58269d62e0 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -47,6 +47,7 @@ if is_torch_available(): LlamaForCausalLM, LlamaForQuestionAnswering, LlamaForSequenceClassification, + LlamaForTokenClassification, LlamaModel, LlamaTokenizer, ) @@ -286,7 +287,13 @@ class LlamaModelTester: @require_torch class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (LlamaModel, LlamaForCausalLM, LlamaForSequenceClassification, LlamaForQuestionAnswering) + ( + LlamaModel, + LlamaForCausalLM, + LlamaForSequenceClassification, + LlamaForQuestionAnswering, + LlamaForTokenClassification, + ) if is_torch_available() else () ) @@ -298,6 +305,7 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi "text-generation": LlamaForCausalLM, "zero-shot": LlamaForSequenceClassification, "question-answering": LlamaForQuestionAnswering, + "token-classification": LlamaForTokenClassification, } if is_torch_available() else {} @@ -370,6 +378,21 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + def test_llama_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = LlamaForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Llama buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index bbc36c050e..10dcb3bf4d 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -46,6 +46,7 @@ if is_torch_available(): from transformers import ( MistralForCausalLM, MistralForSequenceClassification, + MistralForTokenClassification, MistralModel, ) @@ -288,13 +289,16 @@ class MistralModelTester: @require_torch class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (MistralModel, MistralForCausalLM, MistralForSequenceClassification) if is_torch_available() else () + (MistralModel, MistralForCausalLM, MistralForSequenceClassification, MistralForTokenClassification) + if is_torch_available() + else () ) all_generative_model_classes = (MistralForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( { "feature-extraction": MistralModel, "text-classification": MistralForSequenceClassification, + "token-classification": MistralForTokenClassification, "text-generation": MistralForCausalLM, "zero-shot": MistralForSequenceClassification, } @@ -376,6 +380,22 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Mistral,llama->Mistral + def test_Mistral_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = MistralForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Mistral buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 0d92595d8c..0972207fce 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -40,7 +40,12 @@ from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): import torch - from transformers import MixtralForCausalLM, MixtralForSequenceClassification, MixtralModel + from transformers import ( + MixtralForCausalLM, + MixtralForSequenceClassification, + MixtralForTokenClassification, + MixtralModel, + ) class MixtralModelTester: @@ -287,13 +292,16 @@ class MixtralModelTester: # Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Mixtral class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (MixtralModel, MixtralForCausalLM, MixtralForSequenceClassification) if is_torch_available() else () + (MixtralModel, MixtralForCausalLM, MixtralForSequenceClassification, MixtralForTokenClassification) + if is_torch_available() + else () ) all_generative_model_classes = (MixtralForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( { "feature-extraction": MixtralModel, "text-classification": MixtralForSequenceClassification, + "token-classification": MixtralForTokenClassification, "text-generation": MixtralForCausalLM, "zero-shot": MixtralForSequenceClassification, } @@ -375,6 +383,22 @@ class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Mixtral,llama->Mixtral + def test_Mixtral_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = MixtralForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Mixtral buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 86a69d774f..46a650c55a 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -44,6 +44,7 @@ if is_torch_available(): AutoTokenizer, PersimmonForCausalLM, PersimmonForSequenceClassification, + PersimmonForTokenClassification, PersimmonModel, ) from transformers.models.persimmon.modeling_persimmon import ( @@ -283,12 +284,15 @@ class PersimmonModelTester: @require_torch class PersimmonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (PersimmonModel, PersimmonForCausalLM, PersimmonForSequenceClassification) if is_torch_available() else () + (PersimmonModel, PersimmonForCausalLM, PersimmonForSequenceClassification, PersimmonForTokenClassification) + if is_torch_available() + else () ) pipeline_model_mapping = ( { "feature-extraction": PersimmonModel, "text-classification": PersimmonForSequenceClassification, + "token-classification": PersimmonForTokenClassification, # TODO (ydshieh): check why these two fail. Fix them or skip them in a better way. # "text-generation": PersimmonForCausalLM, # "zero-shot": PersimmonForSequenceClassification, @@ -365,6 +369,22 @@ class PersimmonModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Persimmon,llama->persimmon + def test_persimmon_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = PersimmonForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Persimmon buffers include complex numbers, which breaks this test") # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_save_load_fast_init_from_base def test_save_load_fast_init_from_base(self): diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index f4e88a97f0..54718c4303 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -45,6 +45,7 @@ if is_torch_available(): from transformers import ( Qwen2ForCausalLM, Qwen2ForSequenceClassification, + Qwen2ForTokenClassification, Qwen2Model, ) @@ -299,12 +300,17 @@ class Qwen2ModelTester: @require_torch # Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Qwen2 class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = (Qwen2Model, Qwen2ForCausalLM, Qwen2ForSequenceClassification) if is_torch_available() else () + all_model_classes = ( + (Qwen2Model, Qwen2ForCausalLM, Qwen2ForSequenceClassification, Qwen2ForTokenClassification) + if is_torch_available() + else () + ) all_generative_model_classes = (Qwen2ForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( { "feature-extraction": Qwen2Model, "text-classification": Qwen2ForSequenceClassification, + "token-classification": Qwen2ForTokenClassification, "text-generation": Qwen2ForCausalLM, "zero-shot": Qwen2ForSequenceClassification, } @@ -387,6 +393,22 @@ class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Qwen2,llama->Qwen2 + def test_Qwen2_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = Qwen2ForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Qwen2 buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py index f0818e680d..48c6ccf78a 100644 --- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py +++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py @@ -45,6 +45,7 @@ if is_torch_available(): from transformers import ( Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification, + Qwen2MoeForTokenClassification, Qwen2MoeModel, ) @@ -327,13 +328,16 @@ class Qwen2MoeModelTester: # Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Qwen2Moe class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (Qwen2MoeModel, Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification) if is_torch_available() else () + (Qwen2MoeModel, Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification, Qwen2MoeForTokenClassification) + if is_torch_available() + else () ) all_generative_model_classes = (Qwen2MoeForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( { "feature-extraction": Qwen2MoeModel, "text-classification": Qwen2MoeForSequenceClassification, + "token-classification": Qwen2MoeForTokenClassification, "text-generation": Qwen2MoeForCausalLM, "zero-shot": Qwen2MoeForSequenceClassification, } @@ -414,6 +418,22 @@ class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Qwen2Moe,llama->Qwen2Moe + def test_Qwen2Moe_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = Qwen2MoeForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Qwen2Moe buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index f5e74ead9b..083f928612 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -43,6 +43,7 @@ if is_torch_available(): AutoTokenizer, StableLmForCausalLM, StableLmForSequenceClassification, + StableLmForTokenClassification, StableLmModel, ) from transformers.models.stablelm.modeling_stablelm import ( @@ -287,12 +288,15 @@ class StableLmModelTester: # Copied from transformers.tests.persimmon.test_modeling_persimmon.PersimmonModelTest with Persimmon -> StableLm class StableLmModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (StableLmModel, StableLmForCausalLM, StableLmForSequenceClassification) if is_torch_available() else () + (StableLmModel, StableLmForCausalLM, StableLmForSequenceClassification, StableLmForTokenClassification) + if is_torch_available() + else () ) pipeline_model_mapping = ( { "feature-extraction": StableLmModel, "text-classification": StableLmForSequenceClassification, + "token-classification": StableLmForTokenClassification, # TODO (ydshieh): check why these two fail. Fix them or skip them in a better way. # "text-generation": StableLmForCausalLM, # "zero-shot": StableLmForSequenceClassification, @@ -356,6 +360,22 @@ class StableLmModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->StableLm,llama->stablelm + def test_stablelm_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = StableLmForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @parameterized.expand([("linear",), ("dynamic",)]) # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_model_rope_scaling_from_config with Llama->StableLm def test_model_rope_scaling_from_config(self, scaling_type): diff --git a/tests/models/starcoder2/test_modeling_starcoder2.py b/tests/models/starcoder2/test_modeling_starcoder2.py index 95f604d06b..faba4d254b 100644 --- a/tests/models/starcoder2/test_modeling_starcoder2.py +++ b/tests/models/starcoder2/test_modeling_starcoder2.py @@ -43,6 +43,7 @@ if is_torch_available(): AutoTokenizer, Starcoder2ForCausalLM, Starcoder2ForSequenceClassification, + Starcoder2ForTokenClassification, Starcoder2Model, ) @@ -290,13 +291,16 @@ class Starcoder2ModelTester: # Copied from transformers.tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Starcoder2 class Starcoder2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( - (Starcoder2Model, Starcoder2ForCausalLM, Starcoder2ForSequenceClassification) if is_torch_available() else () + (Starcoder2Model, Starcoder2ForCausalLM, Starcoder2ForSequenceClassification, Starcoder2ForTokenClassification) + if is_torch_available() + else () ) all_generative_model_classes = (Starcoder2ForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( { "feature-extraction": Starcoder2Model, "text-classification": Starcoder2ForSequenceClassification, + "token-classification": Starcoder2ForTokenClassification, "text-generation": Starcoder2ForCausalLM, "zero-shot": Starcoder2ForSequenceClassification, } @@ -370,6 +374,22 @@ class Starcoder2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Starcoder2,llama->Starcoder2 + def test_Starcoder2_token_classification_model(self): + config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.num_labels = 3 + input_ids = input_dict["input_ids"] + attention_mask = input_ids.ne(1).to(torch_device) + token_labels = ids_tensor([self.model_tester.batch_size, self.model_tester.seq_length], config.num_labels) + model = Starcoder2ForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=attention_mask, labels=token_labels) + self.assertEqual( + result.logits.shape, + (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels), + ) + @unittest.skip("Starcoder2 buffers include complex numbers, which breaks this test") def test_save_load_fast_init_from_base(self): pass From fce78fd0e917312da36cea4d352896a8f6ec9cdc Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Mon, 20 May 2024 10:08:26 +0200 Subject: [PATCH 02/80] FIX / Quantization: Fix Dockerfile build (#30890) * Update Dockerfile * Update docker/transformers-quantization-latest-gpu/Dockerfile --- docker/transformers-quantization-latest-gpu/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile index 0fd441bc05..2b74dca91f 100755 --- a/docker/transformers-quantization-latest-gpu/Dockerfile +++ b/docker/transformers-quantization-latest-gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive From cd6bd0af3435ab731f7ed1147def5fae3ffc941e Mon Sep 17 00:00:00 2001 From: Benjamin Warner Date: Mon, 20 May 2024 03:36:57 -0500 Subject: [PATCH 03/80] Add support for torch.compile dynamic shapes (#30560) * add torch.compile dynamic support * Add SDPA dynamic shapes compile test & improve SDPA comment * comment consistency --- src/transformers/models/bart/modeling_bart.py | 8 +++- src/transformers/models/bert/modeling_bert.py | 8 ++-- .../models/cohere/modeling_cohere.py | 4 +- .../data2vec/modeling_data2vec_audio.py | 8 +++- .../models/falcon/modeling_falcon.py | 22 ++++++---- .../models/gemma/modeling_gemma.py | 4 +- .../gpt_bigcode/modeling_gpt_bigcode.py | 9 +++- .../models/hubert/modeling_hubert.py | 8 +++- .../models/idefics/modeling_idefics.py | 12 ++++-- .../models/llama/modeling_llama.py | 4 +- .../models/mistral/modeling_mistral.py | 8 +++- .../models/mixtral/modeling_mixtral.py | 8 +++- .../models/musicgen/modeling_musicgen.py | 8 +++- .../modeling_musicgen_melody.py | 8 +++- src/transformers/models/olmo/modeling_olmo.py | 4 +- src/transformers/models/phi/modeling_phi.py | 6 ++- src/transformers/models/phi3/modeling_phi3.py | 8 +++- .../models/qwen2/modeling_qwen2.py | 8 +++- .../models/qwen2_moe/modeling_qwen2_moe.py | 8 +++- src/transformers/models/sew/modeling_sew.py | 8 +++- .../models/stablelm/modeling_stablelm.py | 8 +++- .../models/starcoder2/modeling_starcoder2.py | 8 +++- .../models/unispeech/modeling_unispeech.py | 8 +++- .../unispeech_sat/modeling_unispeech_sat.py | 8 +++- .../models/wav2vec2/modeling_wav2vec2.py | 8 +++- .../models/whisper/modeling_whisper.py | 8 +++- tests/test_modeling_common.py | 41 +++++++++++++++++++ 27 files changed, 190 insertions(+), 60 deletions(-) diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 33fea9a018..bcc9ac3e07 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -592,6 +592,11 @@ class BartSdpaAttention(BartAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -600,8 +605,7 @@ class BartSdpaAttention(BartAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index b516a97187..033dc6ba66 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -428,9 +428,11 @@ class BertSdpaSelfAttention(BertSelfAttention): key_layer = key_layer.contiguous() value_layer = value_layer.contiguous() - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal - # mask in case tgt_len == 1. - is_causal = self.is_decoder and attention_mask is None and tgt_len > 1 + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create + # a causal mask in case tgt_len == 1. + is_causal = True if self.is_decoder and attention_mask is None and tgt_len > 1 else False attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, diff --git a/src/transformers/models/cohere/modeling_cohere.py b/src/transformers/models/cohere/modeling_cohere.py index d6f37af8da..41c4e151a3 100644 --- a/src/transformers/models/cohere/modeling_cohere.py +++ b/src/transformers/models/cohere/modeling_cohere.py @@ -587,8 +587,8 @@ class CohereSdpaAttention(CohereAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() - # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an - # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. is_causal = True if causal_mask is None and q_len > 1 else False attn_output = torch.nn.functional.scaled_dot_product_attention( diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index e77bc728ab..8837c278c5 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -788,6 +788,11 @@ class Data2VecAudioSdpaAttention(Data2VecAudioAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -796,8 +801,7 @@ class Data2VecAudioSdpaAttention(Data2VecAudioAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index 0a14fcb37b..b9fbf8d70b 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -434,16 +434,19 @@ class FalconAttention(nn.Module): if alibi is None: if self._use_sdpa and not output_attentions: - attn_output = F.scaled_dot_product_attention( + # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an + # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` + # The query_length > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not + # create a causal mask in case query_length == 1. + is_causal = True if self.is_causal and attention_mask is None and query_length > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, key_layer, value_layer, - attention_mask, - 0.0, - # The query_length > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case query_length == 1. - is_causal=self.is_causal and attention_mask is None and query_length > 1, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=is_causal, ) - attention_scores = None else: attention_scores = query_layer @ key_layer.transpose(-1, -2) @@ -466,13 +469,16 @@ class FalconAttention(nn.Module): else: if self._use_sdpa and not output_attentions and head_mask is None: - attn_output = F.scaled_dot_product_attention( + # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an + # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` + is_causal = True if self.is_causal and attention_mask is None and query_length > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, key_layer, value_layer, attn_mask=attention_mask, dropout_p=self.attention_dropout.p if self.training else 0.0, - is_causal=self.is_causal and attention_mask is None and query_length > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2) attn_output = attn_output.reshape(batch_size, query_length, self.num_heads * self.head_dim) diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index ec88074ad5..200f91f750 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -571,8 +571,8 @@ class GemmaSdpaAttention(GemmaAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() - # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an - # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. is_causal = True if causal_mask is None and q_len > 1 else False attn_output = torch.nn.functional.scaled_dot_product_attention( diff --git a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py index 37ed2aba62..7b7bfaf1d4 100644 --- a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +++ b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py @@ -549,14 +549,19 @@ class GPTBigCodeSdpaAttention(GPTBigCodeAttention): key = key.contiguous() value = value.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The query_length > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not + # create a causal mask in case query_length == 1. + is_causal = True if self.is_causal and attention_mask is None and query_length > 1 else False + sdpa_result = torch.nn.functional.scaled_dot_product_attention( query, key, value, attn_mask=attention_mask, dropout_p=self.attn_pdrop if self.training else 0.0, - # The query_length > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case query_length == 1. - is_causal=self.is_causal and attention_mask is None and query_length > 1, + is_causal=is_causal, scale=scale, ) diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 3d1d0884c6..83d90f5ded 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -852,6 +852,11 @@ class HubertSdpaAttention(HubertAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -860,8 +865,7 @@ class HubertSdpaAttention(HubertAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 622e336fe4..68ec6ab8a5 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -660,14 +660,18 @@ class IdeficsAttention(nn.Module): key_states = key_states.contiguous() value_states = value_states.contiguous() - attn_output = nn.functional.scaled_dot_product_attention( + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, - dropout_p=self.dropout, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + dropout_p=self.dropout if self.training else 0.0, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 56a83c178c..5d8a3f987a 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -643,8 +643,8 @@ class LlamaSdpaAttention(LlamaAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() - # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an - # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. is_causal = True if causal_mask is None and q_len > 1 else False attn_output = torch.nn.functional.scaled_dot_product_attention( diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index cc5d8f0862..a524c5958e 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -676,14 +676,18 @@ class MistralSdpaAttention(MistralAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 7bafe49905..dfdd114728 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -749,14 +749,18 @@ class MixtralSdpaAttention(MixtralAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index 8e8b1fe284..d6f0ae96f4 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -618,6 +618,11 @@ class MusicgenSdpaAttention(MusicgenAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -626,8 +631,7 @@ class MusicgenSdpaAttention(MusicgenAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index 9865a4b917..6458df0a1b 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -634,6 +634,11 @@ class MusicgenMelodySdpaAttention(MusicgenMelodyAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -642,8 +647,7 @@ class MusicgenMelodySdpaAttention(MusicgenMelodyAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index 8bfa3dc606..9b4b08239b 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -617,8 +617,8 @@ class OlmoSdpaAttention(OlmoAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() - # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an - # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. is_causal = True if causal_mask is None and q_len > 1 else False attn_output = torch.nn.functional.scaled_dot_product_attention( diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 795ff18e5b..1f82b09a25 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -709,13 +709,17 @@ class PhiSdpaAttention(PhiAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 41765632b5..224aad0085 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -778,14 +778,18 @@ class Phi3SdpaAttention(Phi3Attention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index c9b8bd8fde..f90d61d6e6 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -681,14 +681,18 @@ class Qwen2SdpaAttention(Qwen2Attention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index f8d1bf6bb4..21d8d608ee 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -759,14 +759,18 @@ class Qwen2MoeSdpaAttention(Qwen2MoeAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index 60cbb777c7..199d0e543a 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -852,6 +852,11 @@ class SEWSdpaAttention(SEWAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -860,8 +865,7 @@ class SEWSdpaAttention(SEWAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 91a6e83a8b..e8d07340d3 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -487,14 +487,18 @@ class StableLmSdpaAttention(StableLmAttention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout.p if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index ae64b23aa6..d808a3fd25 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -660,14 +660,18 @@ class Starcoder2SdpaAttention(Starcoder2Attention): key_states = key_states.contiguous() value_states = value_states.contiguous() + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, attn_mask=attention_mask, dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, + is_causal=is_causal, ) attn_output = attn_output.transpose(1, 2).contiguous() diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index 5c1557fb1f..aa4bb7827e 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -888,6 +888,11 @@ class UniSpeechSdpaAttention(UniSpeechAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -896,8 +901,7 @@ class UniSpeechSdpaAttention(UniSpeechAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index 853c521e5e..4663fc05d8 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -905,6 +905,11 @@ class UniSpeechSatSdpaAttention(UniSpeechSatAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -913,8 +918,7 @@ class UniSpeechSatSdpaAttention(UniSpeechSatAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index ec928762b5..5fb64c1f2c 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -953,6 +953,11 @@ class Wav2Vec2SdpaAttention(Wav2Vec2Attention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -961,8 +966,7 @@ class Wav2Vec2SdpaAttention(Wav2Vec2Attention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index c0db404e5c..e4fda437bf 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -686,6 +686,11 @@ class WhisperSdpaAttention(WhisperAttention): query_states = self._shape(query_states, tgt_len, bsz) + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 attn_output = torch.nn.functional.scaled_dot_product_attention( @@ -694,8 +699,7 @@ class WhisperSdpaAttention(WhisperAttention): value_states, attn_mask=attention_mask, dropout_p=self.dropout if self.training else 0.0, - # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. - is_causal=self.is_causal and attention_mask is None and tgt_len > 1, + is_causal=is_causal, ) if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 533eedb7f3..5f3ac898da 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -4014,6 +4014,47 @@ class ModelTesterMixin: with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False): _ = model(**inputs_dict) + @require_torch_sdpa + @require_torch_gpu + @slow + def test_sdpa_can_compile_dynamic(self): + compute_capability = torch.cuda.get_device_capability() + major, _ = compute_capability + + if not torch.version.cuda or major < 8: + self.skipTest("This test requires an NVIDIA GPU with compute capability >= 8.0") + + for model_class in self.all_model_classes: + if not model_class._supports_sdpa: + self.skipTest(f"{model_class.__name__} does not support SDPA") + + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + inputs_dict = self._prepare_for_class(inputs_dict, model_class) + if config.model_type in ["dbrx"]: + self.skipTest( + "DBRX (transformers==4.40) requires a modification to support dynamic shapes with compile." + ) + model = model_class(config) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname) + model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa") + model.to(torch_device) + + # For PyTorch 2.1 - 2.3.0 set `dynamic=True`. In the future setting `dynamic=None` and using `torch._dynamo.mark_dynamic()` + # on input tensors will be required. `mark_dynamic` currently raises inconsistent shape errors. + model = torch.compile(model, dynamic=True) + + inputs_dict.pop("attention_mask", None) + inputs_dict.pop("decoder_attention_mask", None) + for name, inp in inputs_dict.items(): + if isinstance(inp, torch.Tensor) and inp.dtype in [torch.float32, torch.float16]: + inputs_dict[name] = inp.to(torch.float16) + + # use no_grad to save some memory + with torch.no_grad(): + _ = model(**inputs_dict) + @require_torch_sdpa @slow def test_eager_matches_sdpa_generate(self): From 5d0bf59b4d5be72c8c956e0240a67d7c3100fdaf Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Mon, 20 May 2024 13:45:56 +0500 Subject: [PATCH 04/80] LLaVa-Next: Update docs with batched inference (#30857) * update docs with batch ex * Update docs/source/en/model_doc/llava_next.md Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> * accept nested list of img --------- Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> --- docs/source/en/model_doc/llava_next.md | 41 +++++++++++++++++++ .../llava_next/image_processing_llava_next.py | 26 +++++++++++- .../test_image_processor_llava_next.py | 18 ++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/llava_next.md b/docs/source/en/model_doc/llava_next.md index a2a3913fca..a4a1419ee0 100644 --- a/docs/source/en/model_doc/llava_next.md +++ b/docs/source/en/model_doc/llava_next.md @@ -68,6 +68,8 @@ The original code can be found [here](https://github.com/haotian-liu/LLaVA/tree/ ## Usage example +### Single image inference + Here's how to load the model and perform inference in half-precision (`torch.float16`): ```python @@ -94,6 +96,45 @@ output = model.generate(**inputs, max_new_tokens=100) print(processor.decode(output[0], skip_special_tokens=True)) ``` +### Multi image inference + +LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it: + +```python +import requests +from PIL import Image +import torch +from transformers import AutoProcessor, LlavaNextForConditionalGeneration + +# Load the model in half-precision +model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto") +processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") + +# Get three different images +url = "https://www.ilankelman.org/stopsigns/australia.jpg" +image_stop = Image.open(requests.get(url, stream=True).raw) + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +image_cats = Image.open(requests.get(url, stream=True).raw) + +url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg" +image_snowman = Image.open(requests.get(url, stream=True).raw) + +# Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not +prompt = [ + "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]", + "[INST] \nWhat is shown in this image? [/INST]" +] + +# We can simply feed images in the order they have to be used in the text prompt +# Each "" token uses one image leaving the next for the subsequent "" tokens +inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device) + +# Generate +generate_ids = model.generate(**inputs, max_new_tokens=30) +processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) +``` + ## Model optimization ### Quantization using Bitsandbytes diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index 34de0f4db0..6295fb9562 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -37,6 +37,7 @@ from ...image_utils import ( get_image_size, infer_channel_dimension_format, is_scaled_image, + is_valid_image, make_list_of_images, to_numpy_array, valid_images, @@ -52,6 +53,29 @@ if is_vision_available(): from PIL import Image +def make_batched_images(images) -> List[List[ImageInput]]: + """ + Accepts images in list or nested list format, and makes a list of images for preprocessing. + + Args: + images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`): + The input image. + + Returns: + list: A list of images. + """ + if isinstance(images, (list, tuple)) and isinstance(images[0], (list, tuple)) and is_valid_image(images[0][0]): + return [img for img_list in images for img in img_list] + + elif isinstance(images, (list, tuple)) and is_valid_image(images[0]): + return images + + elif is_valid_image(images): + return [images] + + raise ValueError(f"Could not make batched video from {images}") + + def divide_to_patches(image: np.array, patch_size: int, input_data_format) -> List[np.array]: """ Divides an image into patches of a specified size. @@ -651,7 +675,7 @@ class LlavaNextImageProcessor(BaseImageProcessor): do_pad = do_pad if do_pad is not None else self.do_pad do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb - images = make_list_of_images(images) + images = make_batched_images(images) if not valid_images(images): raise ValueError( diff --git a/tests/models/llava_next/test_image_processor_llava_next.py b/tests/models/llava_next/test_image_processor_llava_next.py index 7369f8a918..8b1f98bbca 100644 --- a/tests/models/llava_next/test_image_processor_llava_next.py +++ b/tests/models/llava_next/test_image_processor_llava_next.py @@ -199,3 +199,21 @@ class LlavaNextImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): @unittest.skip("LlavaNextImageProcessor doesn't treat 4 channel PIL and numpy consistently yet") # FIXME Amy def test_call_numpy_4_channels(self): pass + + def test_nested_input(self): + image_processing = self.image_processing_class(**self.image_processor_dict) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=True) + + # Test batched as a list of images + encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values + expected_output_image_shape = (7, 1445, 3, 18, 18) + self.assertEqual(tuple(encoded_images.shape), expected_output_image_shape) + + # Test batched as a nested list of images, where each sublist is one batch + image_inputs_nested = [image_inputs[:3], image_inputs[3:]] + encoded_images_nested = image_processing(image_inputs_nested, return_tensors="pt").pixel_values + expected_output_image_shape = (7, 1445, 3, 18, 18) + self.assertEqual(tuple(encoded_images_nested.shape), expected_output_image_shape) + + # Image processor should return same pixel values, independently of ipnut format + self.assertTrue((encoded_images_nested == encoded_images).all()) From 66b0d9ee5d440518a990d604025f916ac710d701 Mon Sep 17 00:00:00 2001 From: Donggeun Yu Date: Mon, 20 May 2024 17:51:04 +0900 Subject: [PATCH 05/80] DeformableDETR two stage support bfloat16 (#30907) Update modeling_deformable_detr.py --- .../models/deformable_detr/modeling_deformable_detr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index bab552da35..b77ae4f8f8 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -1616,8 +1616,8 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel): valid_width = torch.sum(~mask_flatten_[:, 0, :, 0], 1) grid_y, grid_x = meshgrid( - torch.linspace(0, height - 1, height, dtype=torch.float32, device=enc_output.device), - torch.linspace(0, width - 1, width, dtype=torch.float32, device=enc_output.device), + torch.linspace(0, height - 1, height, dtype=enc_output.dtype, device=enc_output.device), + torch.linspace(0, width - 1, width, dtype=enc_output.dtype, device=enc_output.device), indexing="ij", ) grid = torch.cat([grid_x.unsqueeze(-1), grid_y.unsqueeze(-1)], -1) From 1c2bb3ac54d18a0bfc3e212b73f8d1c4aac3ea48 Mon Sep 17 00:00:00 2001 From: Kamil Akesbi <45195979+kamilakesbi@users.noreply.github.com> Date: Mon, 20 May 2024 10:53:58 +0200 Subject: [PATCH 06/80] add return_token_timestamps to WhisperProcessor (#30812) * compute num_frames in WhisperFeatureExtractor * add return_num_frames in WhisperFeatureProcessor + adapt pipeline * return_timestamps renaming + pipeline fix * fix * fix * fix * add tests * Update src/transformers/models/whisper/feature_extraction_whisper.py Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> * apply review changes * fix * Update src/transformers/models/whisper/feature_extraction_whisper.py Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> * Update tests/models/whisper/test_modeling_whisper.py Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> * apply review * fix * review changes * Update src/transformers/models/whisper/feature_extraction_whisper.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * make style quality * EXPECTED_OUTPUT in single line * small numpy->torch fix * fix --------- Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- .../whisper/feature_extraction_whisper.py | 8 ++ .../models/whisper/generation_whisper.py | 15 ++- .../pipelines/automatic_speech_recognition.py | 28 +++--- tests/models/whisper/test_modeling_whisper.py | 93 +++++++++++++++++++ 4 files changed, 127 insertions(+), 17 deletions(-) diff --git a/src/transformers/models/whisper/feature_extraction_whisper.py b/src/transformers/models/whisper/feature_extraction_whisper.py index 508e85b91f..f2d6da5660 100644 --- a/src/transformers/models/whisper/feature_extraction_whisper.py +++ b/src/transformers/models/whisper/feature_extraction_whisper.py @@ -188,6 +188,7 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor): sampling_rate: Optional[int] = None, do_normalize: Optional[bool] = None, device: Optional[str] = "cpu", + return_token_timestamps: Optional[bool] = None, **kwargs, ) -> BatchFeature: """ @@ -237,6 +238,9 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor): device (`str`, *optional*, defaults to `'cpu'`): Specifies the device for computation of the log-mel spectrogram of audio signals in the `_torch_extract_fbank_features` method. (e.g., "cpu", "cuda") + return_token_timestamps (`bool`, *optional*, defaults to `None`): + Whether or not to return the number of frames of the input raw_speech. + These num_frames can be used by the model to compute word level timestamps. """ if sampling_rate is not None: @@ -302,6 +306,7 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor): if isinstance(input_features[0], List): padded_inputs["input_features"] = [np.asarray(feature, dtype=np.float32) for feature in input_features] + else: padded_inputs["input_features"] = input_features @@ -309,6 +314,9 @@ class WhisperFeatureExtractor(SequenceFeatureExtractor): # rescale from sample (48000) to feature (3000) padded_inputs["attention_mask"] = padded_inputs["attention_mask"][:, :: self.hop_length] + if return_token_timestamps is not None: + padded_inputs["num_frames"] = [len(raw_speech_i) // self.hop_length for raw_speech_i in raw_speech] + if return_tensors is not None: padded_inputs = padded_inputs.convert_to_tensors(return_tensors) diff --git a/src/transformers/models/whisper/generation_whisper.py b/src/transformers/models/whisper/generation_whisper.py index c58b0d35e5..2bdff6e534 100644 --- a/src/transformers/models/whisper/generation_whisper.py +++ b/src/transformers/models/whisper/generation_whisper.py @@ -209,11 +209,15 @@ class WhisperGenerationMixin: # 2. num_frames is different, compute the DTW matrix for each sample sequentially # we're using np.unique because num_frames can be int/list/tuple - if len(np.unique(num_frames)) == 1: - # if num_frames is the same, no need to recompute matrix, std and mean for each element of the batch - num_frames = num_frames if isinstance(num_frames, int) else num_frames[0] - + if isinstance(num_frames, int): weights = weights[..., : num_frames // 2] + + elif isinstance(num_frames, (list, tuple, np.ndarray)) and len(np.unique(num_frames)) == 1: + weights = weights[..., : num_frames[0] // 2] + + elif isinstance(num_frames, (torch.Tensor)) and len(torch.unique(num_frames)) == 1: + weights = weights[..., : num_frames[0] // 2] + else: # num_frames is of shape (batch_size,) whereas batch_size is truely batch_size*num_return_sequences repeat_time = batch_size if isinstance(num_frames, int) else batch_size // len(num_frames) @@ -231,7 +235,7 @@ class WhisperGenerationMixin: # Perform dynamic time warping on each element of the batch. for batch_idx in range(batch_size): - if num_frames is not None and isinstance(num_frames, (tuple, list, np.ndarray)): + if num_frames is not None and isinstance(num_frames, (tuple, list, np.ndarray, torch.Tensor)): matrix = weights[batch_idx, ..., : num_frames[batch_idx] // 2] # Normalize and smoothen the weights. @@ -475,6 +479,7 @@ class WhisperGenerationMixin: "The input name `inputs` is deprecated. Please make sure to use `input_features` instead.", FutureWarning, ) + # 1. prepare generation config generation_config, kwargs = self._prepare_generation_config(generation_config, **kwargs) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index de1a9b57ac..123dbcdb67 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -443,11 +443,18 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): return_tensors="pt", ) else: - processed = self.feature_extractor( - inputs, sampling_rate=self.feature_extractor.sampling_rate, return_tensors="pt" - ) - if stride is None: - extra["segment_size"] = len(inputs) + if self.type == "seq2seq_whisper" and stride is None: + processed = self.feature_extractor( + inputs, + sampling_rate=self.feature_extractor.sampling_rate, + return_tensors="pt", + return_token_timestamps=True, + ) + extra["num_frames"] = processed.pop("num_frames") + else: + processed = self.feature_extractor( + inputs, sampling_rate=self.feature_extractor.sampling_rate, return_tensors="pt" + ) if self.torch_dtype is not None: processed = processed.to(dtype=self.torch_dtype) @@ -461,11 +468,11 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): def _forward(self, model_inputs, return_timestamps=False, **generate_kwargs): attention_mask = model_inputs.pop("attention_mask", None) stride = model_inputs.pop("stride", None) - segment_size = model_inputs.pop("segment_size", None) + num_frames = model_inputs.pop("num_frames", None) is_last = model_inputs.pop("is_last") - if stride is not None and segment_size is not None: - raise ValueError("segment_size must be used only when stride is None") + if stride is not None and num_frames is not None: + raise ValueError("num_frames must be used only when stride is None") if self.type in {"seq2seq", "seq2seq_whisper"}: encoder = self.model.get_encoder() @@ -495,10 +502,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): generate_kwargs["num_frames"] = [s[0] // self.feature_extractor.hop_length for s in stride] else: - if isinstance(segment_size, int): - generate_kwargs["num_frames"] = segment_size // self.feature_extractor.hop_length - else: - generate_kwargs["num_frames"] = segment_size[0] // self.feature_extractor.hop_length + generate_kwargs["num_frames"] = num_frames if self.type == "seq2seq_whisper" and inputs.shape[-1] > self.feature_extractor.nb_max_frames: generate_kwargs["input_features"] = inputs diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index fed1b9c059..58acb5f2fd 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -1950,6 +1950,69 @@ class WhisperModelIntegrationTests(unittest.TestCase): transcript = processor.batch_decode(generated_ids, skip_special_tokens=True, output_offsets=True) self.assertEqual(transcript, EXPECTED_TRANSCRIPT) + @slow + def test_large_timestamp_generation(self): + set_seed(0) + processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3") + model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3") + model.to(torch_device) + + input_speech = np.concatenate(self._load_datasamples(4)) + input_features = processor( + input_speech, return_tensors="pt", sampling_rate=16_000, return_token_timestamps=True + ).input_features + input_features = input_features.to(torch_device) + + generated_ids = model.generate(input_features, max_length=448, return_timestamps=True).to("cpu") + + # fmt: off + EXPECTED_OUTPUT = torch.tensor([50258, 50259, 50360, 50365, 2221, 13, 2326, 388, 391, 307, 264, 50244, 295, 264, 2808, 5359, 11, 293, 321, 366, 5404, 281, 2928, 702, 14943, 13, 50629, 50682, 6966, 307, 2221, 13, 2326, 388, 391, 311, 9060, 1570, 1880, 813, 702, 1871, 13, 50870, 50911, 634, 5112, 505, 300, 412, 341, 42729, 3196, 295, 264, 1064, 11, 365, 5272, 293, 12904, 9256, 450, 10539, 949, 505, 11, 51245, 51287, 1034, 4680, 10117, 490, 3936, 293, 1080, 3542, 5160, 881, 26336, 281, 264, 1575, 13, 51494, 51523, 634, 575, 12525, 22618, 1968, 6144, 35617, 1456, 397, 266, 311, 589, 307, 534, 10281, 934, 439, 11, 51799, 51815, 50257]) + # fmt: on + self.assertTrue(torch.allclose(generated_ids, EXPECTED_OUTPUT)) + + EXPECTED_TRANSCRIPT = [ + { + "text": ( + " Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel." + " Nor is Mr. Quilter's manner less interesting than his matter. He tells us that at this festive" + " season of the year, with Christmas and roast beef looming before us, similes drawn from eating" + " and its results occur most readily to the mind. He has grave doubts whether Sir Frederick " + "Leighton's work is really Greek after all," + ), + "offsets": [ + { + "text": ( + " Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel." + ), + "timestamp": (0.0, 5.28), + }, + { + "text": " Nor is Mr. Quilter's manner less interesting than his matter.", + "timestamp": (6.34, 10.1), + }, + { + "text": ( + " He tells us that at this festive season of the year, with Christmas and roast beef looming before us," + ), + "timestamp": (10.92, 17.6), + }, + { + "text": (" similes drawn from eating and its results occur most readily to the mind."), + "timestamp": (18.44, 22.580000000000002), + }, + { + "text": ( + " He has grave doubts whether Sir Frederick Leighton's work is really Greek after all," + ), + "timestamp": (23.16, 28.68), + }, + ], + } + ] + + transcript = processor.batch_decode(generated_ids, skip_special_tokens=True, output_offsets=True) + self.assertEqual(transcript, EXPECTED_TRANSCRIPT) + @slow def test_tiny_token_timestamp_generation(self): set_seed(0) @@ -1979,6 +2042,36 @@ class WhisperModelIntegrationTests(unittest.TestCase): self.assertTrue(torch.allclose(generate_outputs.token_timestamps.to("cpu"), EXPECTED_OUTPUT)) + @slow + def test_large_token_timestamp_generation(self): + set_seed(0) + processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3") + model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3") + model.to(torch_device) + + input_speech = self._load_datasamples(4) + input_features = processor( + input_speech, return_tensors="pt", sampling_rate=16_000, return_token_timestamps=True + ) + input_features = input_features.to(torch_device) + + generate_outputs = model.generate( + **input_features, max_length=448, return_timestamps=True, return_token_timestamps=True + ) + + self.assertEqual(generate_outputs.sequences.shape, generate_outputs.token_timestamps.shape) + + # fmt: off + EXPECTED_OUTPUT = torch.tensor([ + [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6200, 0.7400, 0.8600, 1.0000, 1.0400, 1.3000, 1.4400, 1.7800, 2.1800, 2.2800, 2.5000, 2.9200, 3.0000, 3.3800, 3.5000, 3.6000, 3.8400, 4.1000, 4.4000, 4.6800, 5.1400, 5.3600, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200, 5.8200], + [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6000, 0.9200, 1.2200, 1.3400, 1.4200, 1.5400, 1.5800, 1.7400, 2.0600, 2.3800, 3.0400, 3.3800, 3.6400, 4.1200, 4.3600, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800, 4.7800], + [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5400, 0.8200, 1.1600, 1.4600, 1.7400, 1.8800, 2.3400, 2.7400, 3.1400, 3.2200, 3.5400, 4.2800, 4.5600, 4.8200, 5.0600, 5.3200, 5.6600, 5.9600, 6.1400, 6.4000, 6.8400, 7.8800, 8.0200, 8.3600, 8.7000, 9.0200, 9.3200, 9.5000, 9.8400, 10.3000, 10.6600, 11.0800, 11.3600, 11.4600, 11.8000, 12.4600], + [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5600, 0.7600, 1.0600, 1.4000, 1.8800, 2.2600, 2.6200, 2.8000, 2.9600, 3.0000, 3.2000, 3.4400, 3.6800, 4.0000, 4.6000, 5.0000, 5.3200, 5.4800, 6.0600, 6.0600, 6.1000, 6.3200, 6.7400, 7.0000, 7.2200, 7.4000, 7.7600, 8.0600, 8.5600, 8.8600, 8.9400, 9.1000, 9.3400, 9.8800, 9.8800, 9.8800] + ]) + # fmt: on + + self.assertTrue(torch.allclose(generate_outputs.token_timestamps.to("cpu"), EXPECTED_OUTPUT)) + @slow def test_tiny_token_timestamp_batch_generation(self): set_seed(0) From 1834916481235948f413d3b965f96ccd3b2e44ae Mon Sep 17 00:00:00 2001 From: Goncalo Paulo <30472805+SrGonao@users.noreply.github.com> Date: Mon, 20 May 2024 11:18:09 +0200 Subject: [PATCH 07/80] Fix num_hidden_layers in initialization of new model in Mamba (#30403) Fix num_hidden_layers in initialization Originally, the initialization was using config.num_layers instead of config.num_hidden_layers. This fixes that. --- src/transformers/models/mamba/modeling_mamba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 4834fd6247..bb67b62008 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -399,7 +399,7 @@ class MambaPreTrainedModel(PreTrainedModel): # Having just p *= scale would repeatedly scale it down nn.init.kaiming_uniform_(p, a=math.sqrt(5)) with torch.no_grad(): - p /= math.sqrt(self.config.num_layers) + p /= math.sqrt(self.config.num_hidden_layers) @dataclass From 7b4b4564386dc4e18bf38fc288d87700824d4191 Mon Sep 17 00:00:00 2001 From: Eric2i Date: Mon, 20 May 2024 02:18:17 -0700 Subject: [PATCH 08/80] separate kwargs in processor (similar to #30193) (#30905) * Fix similar bug in processor (related to #30193) * Reformat processing_git.py to comply with ruff formatting --- src/transformers/models/git/processing_git.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/git/processing_git.py b/src/transformers/models/git/processing_git.py index 79f26f3bf2..98649c644e 100644 --- a/src/transformers/models/git/processing_git.py +++ b/src/transformers/models/git/processing_git.py @@ -76,15 +76,21 @@ class GitProcessor(ProcessorMixin): `None`). - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`. """ + tokenizer_kwargs, image_processor_kwargs = {}, {} + if kwargs: + tokenizer_kwargs = {k: v for k, v in kwargs.items() if k not in self.image_processor._valid_processor_keys} + image_processor_kwargs = { + k: v for k, v in kwargs.items() if k in self.image_processor._valid_processor_keys + } if text is None and images is None: raise ValueError("You have to specify either text or images. Both cannot be none.") if text is not None: - encoding = self.tokenizer(text, return_tensors=return_tensors, **kwargs) + encoding = self.tokenizer(text, return_tensors=return_tensors, **tokenizer_kwargs) if images is not None: - image_features = self.image_processor(images, return_tensors=return_tensors, **kwargs) + image_features = self.image_processor(images, return_tensors=return_tensors, **image_processor_kwargs) if text is not None and images is not None: encoding["pixel_values"] = image_features.pixel_values From c11ac7857bf93914f69a209d50a918ad7fe1d9a3 Mon Sep 17 00:00:00 2001 From: Hafedh <70411813+not-lain@users.noreply.github.com> Date: Mon, 20 May 2024 10:38:32 +0100 Subject: [PATCH 09/80] fix for custom pipeline configuration (#29004) * fix for custom pipeline configuration * fix for custom pipelines * remove extra exception * added test for custom pipelines extra tag * format with ruff * limit extra tag for first time only * format with ruff * improve tests for custom pipelines --- src/transformers/configuration_utils.py | 5 +++++ src/transformers/feature_extraction_utils.py | 14 ++++++++++---- src/transformers/image_processing_utils.py | 15 ++++++++++----- src/transformers/processing_utils.py | 14 ++++++++++---- src/transformers/tokenization_utils_base.py | 20 +++++++++++++------- src/transformers/utils/__init__.py | 1 + src/transformers/utils/generic.py | 13 +++++++++++++ tests/pipelines/test_pipelines_common.py | 18 ++++++++++++++++++ 8 files changed, 80 insertions(+), 20 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index fc7f782e34..86b946d162 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -32,6 +32,7 @@ from .utils import ( CONFIG_NAME, PushToHubMixin, add_model_info_to_auto_map, + add_model_info_to_custom_pipelines, cached_file, copy_func, download_url, @@ -736,6 +737,10 @@ class PretrainedConfig(PushToHubMixin): config_dict["auto_map"] = add_model_info_to_auto_map( config_dict["auto_map"], pretrained_model_name_or_path ) + if "custom_pipelines" in config_dict and not is_local: + config_dict["custom_pipelines"] = add_model_info_to_custom_pipelines( + config_dict["custom_pipelines"], pretrained_model_name_or_path + ) return config_dict, kwargs @classmethod diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index 76070ebeb8..44e01f8d85 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -31,6 +31,7 @@ from .utils import ( PushToHubMixin, TensorType, add_model_info_to_auto_map, + add_model_info_to_custom_pipelines, cached_file, copy_func, download_url, @@ -539,10 +540,15 @@ class FeatureExtractionMixin(PushToHubMixin): f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}" ) - if "auto_map" in feature_extractor_dict and not is_local: - feature_extractor_dict["auto_map"] = add_model_info_to_auto_map( - feature_extractor_dict["auto_map"], pretrained_model_name_or_path - ) + if not is_local: + if "auto_map" in feature_extractor_dict: + feature_extractor_dict["auto_map"] = add_model_info_to_auto_map( + feature_extractor_dict["auto_map"], pretrained_model_name_or_path + ) + if "custom_pipelines" in feature_extractor_dict: + feature_extractor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines( + feature_extractor_dict["custom_pipelines"], pretrained_model_name_or_path + ) return feature_extractor_dict, kwargs diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py index efd1e04a62..c42378d8f3 100644 --- a/src/transformers/image_processing_utils.py +++ b/src/transformers/image_processing_utils.py @@ -31,6 +31,7 @@ from .utils import ( IMAGE_PROCESSOR_NAME, PushToHubMixin, add_model_info_to_auto_map, + add_model_info_to_custom_pipelines, cached_file, copy_func, download_url, @@ -375,11 +376,15 @@ class ImageProcessingMixin(PushToHubMixin): f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}" ) - if "auto_map" in image_processor_dict and not is_local: - image_processor_dict["auto_map"] = add_model_info_to_auto_map( - image_processor_dict["auto_map"], pretrained_model_name_or_path - ) - + if not is_local: + if "auto_map" in image_processor_dict: + image_processor_dict["auto_map"] = add_model_info_to_auto_map( + image_processor_dict["auto_map"], pretrained_model_name_or_path + ) + if "custom_pipelines" in image_processor_dict: + image_processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines( + image_processor_dict["custom_pipelines"], pretrained_model_name_or_path + ) return image_processor_dict, kwargs @classmethod diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 463591032c..a8e47fb683 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -30,6 +30,7 @@ from .utils import ( PROCESSOR_NAME, PushToHubMixin, add_model_info_to_auto_map, + add_model_info_to_custom_pipelines, cached_file, copy_func, direct_transformers_import, @@ -355,10 +356,15 @@ class ProcessorMixin(PushToHubMixin): else: logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}") - if "auto_map" in processor_dict and not is_local: - processor_dict["auto_map"] = add_model_info_to_auto_map( - processor_dict["auto_map"], pretrained_model_name_or_path - ) + if not is_local: + if "auto_map" in processor_dict: + processor_dict["auto_map"] = add_model_info_to_auto_map( + processor_dict["auto_map"], pretrained_model_name_or_path + ) + if "custom_pipelines" in processor_dict: + processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines( + processor_dict["custom_pipelines"], pretrained_model_name_or_path + ) return processor_dict, kwargs diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 395f9859cd..4cb75c9864 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -42,6 +42,7 @@ from .utils import ( TensorType, add_end_docstrings, add_model_info_to_auto_map, + add_model_info_to_custom_pipelines, cached_file, copy_func, download_url, @@ -2177,13 +2178,18 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): config_tokenizer_class = None init_kwargs = init_configuration - if "auto_map" in init_kwargs and not _is_local: - # For backward compatibility with odl format. - if isinstance(init_kwargs["auto_map"], (tuple, list)): - init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]} - init_kwargs["auto_map"] = add_model_info_to_auto_map( - init_kwargs["auto_map"], pretrained_model_name_or_path - ) + if not _is_local: + if "auto_map" in init_kwargs: + # For backward compatibility with odl format. + if isinstance(init_kwargs["auto_map"], (tuple, list)): + init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]} + init_kwargs["auto_map"] = add_model_info_to_auto_map( + init_kwargs["auto_map"], pretrained_model_name_or_path + ) + if "custom_pipelines" in init_kwargs: + init_kwargs["custom_pipelines"] = add_model_info_to_custom_pipelines( + init_kwargs["custom_pipelines"], pretrained_model_name_or_path + ) if config_tokenizer_class is None: # Matt: This entire block is only used to decide if the tokenizer class matches the class in the repo. diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 8c91463322..82d0b5001c 100755 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -37,6 +37,7 @@ from .generic import ( PaddingStrategy, TensorType, add_model_info_to_auto_map, + add_model_info_to_custom_pipelines, cached_property, can_return_loss, expand_dims, diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index a8277588ff..1f332434a9 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -721,6 +721,19 @@ def add_model_info_to_auto_map(auto_map, repo_id): return auto_map +def add_model_info_to_custom_pipelines(custom_pipeline, repo_id): + """ + Adds the information of the repo_id to a given custom pipeline. + """ + # {custom_pipelines : {task: {"impl": "path.to.task"},...} } + for task in custom_pipeline.keys(): + if "impl" in custom_pipeline[task]: + module = custom_pipeline[task]["impl"] + if "--" not in module: + custom_pipeline[task]["impl"] = f"{repo_id}--{module}" + return custom_pipeline + + def infer_framework(model_class): """ Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 763c7d1a88..6ede7d1c7a 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -925,6 +925,24 @@ class DynamicPipelineTester(unittest.TestCase): # Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a # dynamic module self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline") + # check for tag exitence, tag needs to be added when we are calling a custom pipeline from the hub + # useful for cases such as finetuning + self.assertDictEqual( + new_classifier.model.config.custom_pipelines, + { + "pair-classification": { + "impl": f"{USER}/test-dynamic-pipeline--custom_pipeline.PairClassificationPipeline", + "pt": ("AutoModelForSequenceClassification",), + "tf": (), + } + }, + ) + # test if the pipeline still works after the model is finetuned + # (we are actually testing if the pipeline still works from the final repo) + # this is where the user/repo--module.class is used for + new_classifier.model.push_to_hub(repo_name=f"{USER}/test-pipeline-for-a-finetuned-model", token=self._token) + del new_classifier # free up memory + new_classifier = pipeline(model=f"{USER}/test-pipeline-for-a-finetuned-model", trust_remote_code=True) results = classifier("I hate you", second_text="I love you") new_results = new_classifier("I hate you", second_text="I love you") From e6708709cb7ee2cc04df641403ed0671ee7806c6 Mon Sep 17 00:00:00 2001 From: Yoach Lacombe <52246514+ylacombe@users.noreply.github.com> Date: Mon, 20 May 2024 13:40:42 +0200 Subject: [PATCH 10/80] Add AutoFeatureExtractor support to Wav2Vec2ProcessorWithLM (#28706) * Add AutoFeatureExtractor support to Wav2Vec2ProcessorWithLM * update with a type filter * add raises error test * fix added test --- .../processing_wav2vec2_with_lm.py | 21 ++++++++----- .../test_processor_wav2vec2_with_lm.py | 31 ++++++++++++++++++- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py index b388be245f..9fceb1e61a 100644 --- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py @@ -70,15 +70,15 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin): with language model support into a single processor for language model boosted speech recognition decoding. Args: - feature_extractor ([`Wav2Vec2FeatureExtractor`]): - An instance of [`Wav2Vec2FeatureExtractor`]. The feature extractor is a required input. + feature_extractor ([`Wav2Vec2FeatureExtractor`] or [`SeamlessM4TFeatureExtractor`]): + An instance of [`Wav2Vec2FeatureExtractor`] or [`SeamlessM4TFeatureExtractor`]. The feature extractor is a required input. tokenizer ([`Wav2Vec2CTCTokenizer`]): An instance of [`Wav2Vec2CTCTokenizer`]. The tokenizer is a required input. decoder (`pyctcdecode.BeamSearchDecoderCTC`): An instance of [`pyctcdecode.BeamSearchDecoderCTC`]. The decoder is a required input. """ - feature_extractor_class = "Wav2Vec2FeatureExtractor" + feature_extractor_class = "AutoFeatureExtractor" tokenizer_class = "Wav2Vec2CTCTokenizer" def __init__( @@ -93,6 +93,11 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin): if not isinstance(decoder, BeamSearchDecoderCTC): raise ValueError(f"`decoder` has to be of type {BeamSearchDecoderCTC.__class__}, but is {type(decoder)}") + if feature_extractor.__class__.__name__ not in ["Wav2Vec2FeatureExtractor", "SeamlessM4TFeatureExtractor"]: + raise ValueError( + f"`feature_extractor` has to be of type `Wav2Vec2FeatureExtractor` or `SeamlessM4TFeatureExtractor`, but is {type(feature_extractor)}" + ) + # make sure that decoder's alphabet and tokenizer's vocab match in content missing_decoder_tokens = self.get_missing_alphabet_tokens(decoder, tokenizer) if len(missing_decoder_tokens) > 0: @@ -117,7 +122,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin): - This class method is simply calling Wav2Vec2FeatureExtractor's + This class method is simply calling the feature extractor's [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`], Wav2Vec2CTCTokenizer's [`~tokenization_utils_base.PreTrainedTokenizerBase.from_pretrained`], and [`pyctcdecode.BeamSearchDecoderCTC.load_from_hf_hub`]. @@ -213,8 +218,8 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin): def __call__(self, *args, **kwargs): """ - When used in normal mode, this method forwards all its arguments to Wav2Vec2FeatureExtractor's - [`~Wav2Vec2FeatureExtractor.__call__`] and returns its output. If used in the context + When used in normal mode, this method forwards all its arguments to the feature extractor's + [`~FeatureExtractionMixin.__call__`] and returns its output. If used in the context [`~Wav2Vec2ProcessorWithLM.as_target_processor`] this method forwards all its arguments to Wav2Vec2CTCTokenizer's [`~Wav2Vec2CTCTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information. @@ -252,8 +257,8 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin): def pad(self, *args, **kwargs): """ - When used in normal mode, this method forwards all its arguments to Wav2Vec2FeatureExtractor's - [`~Wav2Vec2FeatureExtractor.pad`] and returns its output. If used in the context + When used in normal mode, this method forwards all its arguments to the feature extractor's + [`~FeatureExtractionMixin.pad`] and returns its output. If used in the context [`~Wav2Vec2ProcessorWithLM.as_target_processor`] this method forwards all its arguments to Wav2Vec2CTCTokenizer's [`~Wav2Vec2CTCTokenizer.pad`]. Please refer to the docstring of the above two methods for more information. diff --git a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py index 2c52a92165..61dee30091 100644 --- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py +++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py @@ -25,7 +25,7 @@ import numpy as np from datasets import load_dataset from parameterized import parameterized -from transformers import AutoProcessor +from transformers import AutoFeatureExtractor, AutoProcessor from transformers.models.wav2vec2 import Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor from transformers.models.wav2vec2.tokenization_wav2vec2 import VOCAB_FILES_NAMES from transformers.testing_utils import require_pyctcdecode, require_torch, require_torchaudio, slow @@ -157,6 +157,35 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): for key in input_feat_extract.keys(): self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) + def test_another_feature_extractor(self): + feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/w2v-bert-2.0") + tokenizer = self.get_tokenizer() + decoder = self.get_decoder() + + processor = Wav2Vec2ProcessorWithLM(tokenizer=tokenizer, feature_extractor=feature_extractor, decoder=decoder) + + raw_speech = floats_list((3, 1000)) + + input_feat_extract = feature_extractor(raw_speech, return_tensors="np") + input_processor = processor(raw_speech, return_tensors="np") + + for key in input_feat_extract.keys(): + self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) + + self.assertListEqual( + processor.model_input_names, + feature_extractor.model_input_names, + msg="`processor` and `feature_extractor` model input names do not match", + ) + + def test_wrong_feature_extractor_raises_error(self): + feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-large-v3") + tokenizer = self.get_tokenizer() + decoder = self.get_decoder() + + with self.assertRaises(ValueError): + Wav2Vec2ProcessorWithLM(tokenizer=tokenizer, feature_extractor=feature_extractor, decoder=decoder) + def test_tokenizer(self): feature_extractor = self.get_feature_extractor() tokenizer = self.get_tokenizer() From 76e05301c3eb14c08a8c9e0dc3aadbc5a155f0e0 Mon Sep 17 00:00:00 2001 From: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com> Date: Mon, 20 May 2024 13:55:57 +0200 Subject: [PATCH 11/80] Fix a shape annotation and typos in `mamba` slow forward (#30691) * fix typos and one shape comment * fix `intermediade` typo in jamba --- src/transformers/models/jamba/modeling_jamba.py | 10 +++++----- src/transformers/models/mamba/modeling_mamba.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/jamba/modeling_jamba.py b/src/transformers/models/jamba/modeling_jamba.py index cfa80d2ce3..5a582acdd2 100755 --- a/src/transformers/models/jamba/modeling_jamba.py +++ b/src/transformers/models/jamba/modeling_jamba.py @@ -962,15 +962,15 @@ class JambaMambaMixer(nn.Module): # 3.b. Discretization: B and C to [batch, seq_len, intermediate_size, ssm_state_size] (SRAM) A = -torch.exp(self.A_log.float()) # [intermediate_size, ssm_state_size] discrete_A = torch.exp(A[None, :, None, :] * discrete_time_step[:, :, :, None]) # [batch, intermediate_size, seq_len, ssm_state_size] - discrete_B = discrete_time_step[:, :, :, None] * B[:, None, :, :].float() # [batch, intermediade_size, seq_len, ssm_state_size] + discrete_B = discrete_time_step[:, :, :, None] * B[:, None, :, :].float() # [batch, intermediate_size, seq_len, ssm_state_size] deltaB_u = discrete_B * hidden_states[:, :, :, None].float() # 3.c perform the recurrence y ← SSM(A, B, C)(x) scan_outputs = [] for i in range(seq_len): - ssm_state = discrete_A[:, :, i, :] * ssm_state + deltaB_u[:, :, i, :] # [batch, intermediade_size, ssm_state] - scan_output = torch.matmul(ssm_state.to(dtype), C[:, i, :].unsqueeze(-1)) # [batch, intermediade_size, 1] + ssm_state = discrete_A[:, :, i, :] * ssm_state + deltaB_u[:, :, i, :] # [batch, intermediate_size, ssm_state] + scan_output = torch.matmul(ssm_state.to(dtype), C[:, i, :].unsqueeze(-1)) # [batch, intermediate_size, 1] scan_outputs.append(scan_output[:, :, 0]) - scan_output = torch.stack(scan_outputs, dim=-1) # [batch, intermediade_size, seq_len] + scan_output = torch.stack(scan_outputs, dim=-1) # [batch, intermediate_size, seq_len] scan_output = scan_output + (hidden_states * self.D[None, :, None]) scan_output = (scan_output * self.act(gate)) @@ -978,7 +978,7 @@ class JambaMambaMixer(nn.Module): cache_params.ssm_states[self.layer_idx] = ssm_state # 4. Final linear projection - contextualized_states = self.out_proj(scan_output.transpose(1, 2)) # [batch, seq_len, hidden_size] + contextualized_states = self.out_proj(scan_output.transpose(1, 2)) # [batch, seq_len, hidden_size] return contextualized_states # fmt: on diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index bb67b62008..a8309e0430 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -279,16 +279,16 @@ class MambaMixer(nn.Module): # 3.b. Discretization: B and C to [batch, seq_len, intermediate_size, ssm_state_size] (SRAM) A = -torch.exp(self.A_log.float()) # [intermediate_size, ssm_state_size] discrete_A = torch.exp(A[None, :, None, :] * discrete_time_step[:, :, :, None]) # [batch, intermediate_size, seq_len, ssm_state_size] - discrete_B = discrete_time_step[:, :, :, None] * B[:, None, :, :].float() # [batch, intermediade_size, seq_len, ssm_state_size] + discrete_B = discrete_time_step[:, :, :, None] * B[:, None, :, :].float() # [batch, intermediate_size, seq_len, ssm_state_size] deltaB_u = discrete_B * hidden_states[:, :, :, None].float() # 3.c perform the recurrence y ← SSM(A, B, C)(x) scan_outputs = [] for i in range(seq_len): - ssm_state = discrete_A[:, :, i, :] * ssm_state + deltaB_u[:, :, i, :] # [batch, intermediade_size, ssm_state] - scan_output = torch.matmul(ssm_state.to(dtype), C[:, i, :].unsqueeze(-1)) # [batch, intermediade_size, 1] + ssm_state = discrete_A[:, :, i, :] * ssm_state + deltaB_u[:, :, i, :] # [batch, intermediate_size, ssm_state] + scan_output = torch.matmul(ssm_state.to(dtype), C[:, i, :].unsqueeze(-1)) # [batch, intermediate_size, 1] scan_outputs.append(scan_output[:, :, 0]) - scan_output = torch.stack(scan_outputs, dim=-1) # [batch, seq_len, intermediade_size] + scan_output = torch.stack(scan_outputs, dim=-1) # [batch, intermediate_size, seq_len] scan_output = scan_output + (hidden_states * self.D[None, :, None]) scan_output = (scan_output * self.act(gate)) @@ -296,7 +296,7 @@ class MambaMixer(nn.Module): cache_params.ssm_states[self.layer_idx].copy_(ssm_state) # 4. Final linear projection - contextualized_states = self.out_proj(scan_output.transpose(1, 2)) # [batch, seq_len, hidden_size] + contextualized_states = self.out_proj(scan_output.transpose(1, 2)) # [batch, seq_len, hidden_size] return contextualized_states # fmt: on From bb48e921868ac750417956de941606f7e2fa02ca Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Mon, 20 May 2024 13:56:11 +0200 Subject: [PATCH 12/80] `tokenizer_class = "AutoTokenizer"` Llava Family (#30912) propagate changes to more models --- src/transformers/models/llava_next/processing_llava_next.py | 4 ++-- src/transformers/models/video_llava/processing_video_llava.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py index 8a4b76e9c6..91cd544ab6 100644 --- a/src/transformers/models/llava_next/processing_llava_next.py +++ b/src/transformers/models/llava_next/processing_llava_next.py @@ -40,8 +40,8 @@ class LlavaNextProcessor(ProcessorMixin): """ attributes = ["image_processor", "tokenizer"] - image_processor_class = "LlavaNextImageProcessor" - tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast") + image_processor_class = "AutoImageProcessor" + tokenizer_class = "AutoTokenizer" def __init__(self, image_processor=None, tokenizer=None): super().__init__(image_processor, tokenizer) diff --git a/src/transformers/models/video_llava/processing_video_llava.py b/src/transformers/models/video_llava/processing_video_llava.py index 0355d756ce..b71600501b 100644 --- a/src/transformers/models/video_llava/processing_video_llava.py +++ b/src/transformers/models/video_llava/processing_video_llava.py @@ -42,7 +42,7 @@ class VideoLlavaProcessor(ProcessorMixin): attributes = ["image_processor", "tokenizer"] image_processor_class = "VideoLlavaImageProcessor" - tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast") + tokenizer_class = "AutoTokenizer" def __init__(self, image_processor=None, tokenizer=None): super().__init__(image_processor, tokenizer) From 92d1d97c05a01160d6e7fcf4198e93bf2cec0dfe Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Mon, 20 May 2024 09:21:40 -0400 Subject: [PATCH 13/80] Introduce configured_state arg for accelerator_config (#29781) * Introduce configured_state * Include note on tuning * Allow for users to have defined a state already * Include tests * Add note on hpam tune * Guard a bit better * Update src/transformers/training_args.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update src/transformers/training_args.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Finish rebase * Finish rebase * Guard carefully * Fixup test * Refactor * Fin refactor * Comment * Update wrt feedback --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- src/transformers/trainer_pt_utils.py | 14 +++ src/transformers/training_args.py | 134 ++++++++++++++++----------- tests/trainer/test_trainer.py | 14 +++ 3 files changed, 110 insertions(+), 52 deletions(-) diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index 9defa91b2b..8ac0281912 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -1250,6 +1250,10 @@ class AcceleratorConfig: Whether to use non-blocking CUDA calls to help minimize synchronization during distributed training with prepared `DataLoader` inputs being moved to device. Best if used with `pin_memory=True` in the `TrainingArguments`. + use_configured_state (`bool*, *optional*, defaults to `False`): + Whether or not to use a pre-configured `AcceleratorState` or `PartialState` defined + before calling `TrainingArguments`. If `True`, an `Accelerator` or `PartialState` + must be initialized. May lead to issues using sweeps or hyperparameter tuning. """ @@ -1312,6 +1316,13 @@ class AcceleratorConfig: " The [`accelerate.utils.GradientAccumulationPlugin`] default is `False`." }, ) + use_configured_state: bool = field( + default=False, + metadata={ + "help": "Whether or not to use a pre-configured `AcceleratorState` or `PartialState` defined before calling `TrainingArguments`." + "If `True`, an `Accelerator` or `PartialState` must be initialized. May lead to issues using sweeps or hyperparameter tuning." + }, + ) @classmethod def from_json_file(cls, json_file): @@ -1331,6 +1342,9 @@ class AcceleratorConfig: def to_dict(self): return copy.deepcopy(self.__dict__) + def pop(self, key, default=None): + return self.__dict__.pop(key, default) + class LayerWiseDummyOptimizer(torch.optim.Optimizer): """ diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 2807c9951a..08d9000cb6 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -572,6 +572,10 @@ class TrainingArguments: training results are fully reproducable using a different sampling technique. While seed-to-seed results may differ, on average the differences are neglible when using multiple different seeds to compare. Should also be ran with [`~utils.set_seed`] for the best results. + - use_configured_state (`bool`, *optional*, defaults to `False`): + Whether or not to use a pre-configured `AcceleratorState` or `PartialState` defined before calling `TrainingArguments`. + If `True`, an `Accelerator` or `PartialState` must be initialized. Note that by doing so, this could lead to issues + with hyperparameter tuning. label_smoothing_factor (`float`, *optional*, defaults to 0.0): The label smoothing factor to use. Zero means no label smoothing, otherwise the underlying onehot-encoded @@ -1635,6 +1639,39 @@ class TrainingArguments: if version.parse(version.parse(torch.__version__).base_version) == version.parse("2.0.0") and self.fp16: raise ValueError("--optim adamw_torch_fused with --fp16 requires PyTorch>2.0") + # We need to setup the accelerator config here *before* the first call to `self.device` + if is_accelerate_available(): + if not isinstance(self.accelerator_config, (AcceleratorConfig)): + if self.accelerator_config is None: + self.accelerator_config = AcceleratorConfig() + elif isinstance(self.accelerator_config, dict): + self.accelerator_config = AcceleratorConfig(**self.accelerator_config) + # Check that a user didn't pass in the class instantiator + # such as `accelerator_config = AcceleratorConfig` + elif isinstance(self.accelerator_config, type): + raise NotImplementedError( + "Tried passing in a callable to `accelerator_config`, but this is not supported. " + "Please pass in a fully constructed `AcceleratorConfig` object instead." + ) + else: + self.accelerator_config = AcceleratorConfig.from_json_file(self.accelerator_config) + + if self.dispatch_batches is not None: + warnings.warn( + "Using `--dispatch_batches` is deprecated and will be removed in version 4.41 of 🤗 Transformers. Use" + " `--accelerator_config {'dispatch_batches':VALUE} instead", + FutureWarning, + ) + self.accelerator_config.dispatch_batches = self.dispatch_batches + + if self.split_batches is not None: + warnings.warn( + "Using `--split_batches` is deprecated and will be removed in version 4.41 of 🤗 Transformers. Use" + " `--accelerator_config {'split_batches':VALUE} instead", + FutureWarning, + ) + self.accelerator_config.split_batches = self.split_batches + if ( self.framework == "pt" and is_torch_available() @@ -1873,37 +1910,6 @@ class TrainingArguments: os.environ[f"{prefix}USE_ORIG_PARAMS"] = str(self.fsdp_config.get("use_orig_params", "true")).lower() - if is_accelerate_available(): - if not isinstance(self.accelerator_config, (AcceleratorConfig)): - if self.accelerator_config is None: - self.accelerator_config = AcceleratorConfig() - elif isinstance(self.accelerator_config, dict): - self.accelerator_config = AcceleratorConfig(**self.accelerator_config) - # Check that a user didn't pass in the class instantiator - # such as `accelerator_config = AcceleratorConfig` - elif isinstance(self.accelerator_config, type): - raise NotImplementedError( - "Tried passing in a callable to `accelerator_config`, but this is not supported. " - "Please pass in a fully constructed `AcceleratorConfig` object instead." - ) - else: - self.accelerator_config = AcceleratorConfig.from_json_file(self.accelerator_config) - if self.dispatch_batches is not None: - warnings.warn( - "Using `--dispatch_batches` is deprecated and will be removed in version 4.41 of 🤗 Transformers. Use" - " `--accelerator_config {'dispatch_batches':VALUE} instead", - FutureWarning, - ) - self.accelerator_config.dispatch_batches = self.dispatch_batches - - if self.split_batches is not None: - warnings.warn( - "Using `--split_batches` is deprecated and will be removed in version 4.41 of 🤗 Transformers. Use" - " `--accelerator_config {'split_batches':VALUE} instead", - FutureWarning, - ) - self.accelerator_config.split_batches = self.split_batches - if self.tpu_metrics_debug: warnings.warn( "using `--tpu_metrics_debug` is deprecated and will be removed in version 5 of 🤗 Transformers. Use" @@ -2056,32 +2062,62 @@ class TrainingArguments: f"Using the `Trainer` with `PyTorch` requires `accelerate>={ACCELERATE_MIN_VERSION}`: " "Please run `pip install transformers[torch]` or `pip install accelerate -U`" ) + # We delay the init of `PartialState` to the end for clarity + accelerator_state_kwargs = {"enabled": True, "use_configured_state": False} + if isinstance(self.accelerator_config, AcceleratorConfig): + accelerator_state_kwargs["use_configured_state"] = self.accelerator_config.pop( + "use_configured_state", False + ) + if accelerator_state_kwargs["use_configured_state"]: + if PartialState._shared_state == {}: + raise ValueError( + "Passing `'use_configured_state':True` to the AcceleratorConfig requires a pre-configured " + "`AcceleratorState` or `PartialState` to be defined before calling `TrainingArguments`. " + ) + # We rely on `PartialState` to yell if there's issues here (which it will) + self.distributed_state = PartialState(cpu=self.use_cpu) + if self.deepspeed and self.distributed_state.distributed_type != DistributedType.DEEPSPEED: + raise RuntimeError( + "Tried to use an already configured `Accelerator` or `PartialState` that was not initialized for DeepSpeed, " + "but also passed in a `deepspeed` configuration to the `TrainingArguments`. Please set " + "`use_configured_state:False` instead or setup your `Accelerator` or `PartialState` properly." + ) + else: AcceleratorState._reset_state(reset_partial_state=True) - self.distributed_state = None + self.distributed_state = None if not self.use_ipex and "ACCELERATE_USE_IPEX" not in os.environ: os.environ["ACCELERATE_USE_IPEX"] = "false" + + self._n_gpu = 1 if self.use_cpu or strtobool(os.environ.get("ACCELERATE_USE_CPU", "False")): - self.distributed_state = PartialState(cpu=True, backend=self.ddp_backend) + accelerator_state_kwargs["cpu"] = True + accelerator_state_kwargs["backend"] = self.ddp_backend self._n_gpu = 0 elif is_sagemaker_mp_enabled(): + accelerator_state_kwargs["enabled"] = False local_rank = smp.local_rank() device = torch.device("cuda", local_rank) - self._n_gpu = 1 torch.cuda.set_device(device) elif is_sagemaker_dp_enabled(): - self.distributed_state = PartialState(_use_sagemaker_dp=True) - self._n_gpu = 1 + accelerator_state_kwargs["_use_sagemaker_dp"] = True elif self.deepspeed: - # Need to do similar for Accelerator init - os.environ["ACCELERATE_USE_DEEPSPEED"] = "true" - self.distributed_state = PartialState(timeout=timedelta(seconds=self.ddp_timeout)) - del os.environ["ACCELERATE_USE_DEEPSPEED"] - self._n_gpu = 1 + accelerator_state_kwargs["use_deepspeed"] = True + accelerator_state_kwargs["timeout"] = timedelta(seconds=self.ddp_timeout) else: - self.distributed_state = PartialState( - backend=self.ddp_backend, timeout=timedelta(seconds=self.ddp_timeout) - ) - self._n_gpu = 1 + accelerator_state_kwargs["backend"] = self.ddp_backend + accelerator_state_kwargs["timeout"] = timedelta(seconds=self.ddp_timeout) + + # Now we pop everything + if accelerator_state_kwargs.pop("enabled", False) and not accelerator_state_kwargs.pop( + "use_configured_state", False + ): + # We need to patch this env var when enabling to detect deepspeed + use_deepspeed = accelerator_state_kwargs.pop("use_deepspeed", False) + if use_deepspeed: + os.environ["ACCELERATE_USE_DEEPSPEED"] = "true" + self.distributed_state = PartialState(**accelerator_state_kwargs) + if use_deepspeed: + del os.environ["ACCELERATE_USE_DEEPSPEED"] if not is_sagemaker_mp_enabled(): device = self.distributed_state.device self.local_rank = self.distributed_state.local_process_index @@ -2108,23 +2144,17 @@ class TrainingArguments: "Either you do not have an MPS-enabled device on this machine or MacOS version is not 12.3+ " "or current PyTorch install was not built with MPS enabled." ) - if device.type == "mps": - self._n_gpu = 1 - elif self.use_cpu: + if self.use_cpu: device = torch.device("cpu") - self._n_gpu = 0 elif is_torch_xpu_available(): device = torch.device("xpu:0") torch.xpu.set_device(device) - self._n_gpu = 1 elif is_torch_mlu_available(): device = torch.device("mlu:0") torch.mlu.set_device(device) - self._n_gpu = 1 elif is_torch_npu_available(): device = torch.device("npu:0") torch.npu.set_device(device) - self._n_gpu = 1 else: # if n_gpu is > 1 we'll use nn.DataParallel. # If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0` diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index c420da4052..1711f600ce 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -131,6 +131,10 @@ if is_torch_available(): # for version specific tests in TrainerIntegrationTest require_accelerate_version_min_0_28 = partial(require_accelerate, min_version="0.28") GRAD_ACCUM_KWARGS_VERSION_AVAILABLE = is_accelerate_available("0.28") +if is_accelerate_available(): + from accelerate import Accelerator + from accelerate.state import AcceleratorState + PATH_SAMPLE_TEXT = f"{get_tests_dir()}/fixtures/sample_text.txt" @@ -3266,6 +3270,16 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): trainer = Trainer(model=model, args=args, eval_dataset=eval_dataset) self.assertEqual(trainer.accelerator.split_batches, True) + def test_accelerator_custom_state(self): + AcceleratorState._reset_state(reset_partial_state=True) + with tempfile.TemporaryDirectory() as tmp_dir: + with self.assertRaises(ValueError) as cm: + _ = RegressionTrainingArguments(output_dir=tmp_dir, accelerator_config={"use_configured_state": True}) + self.assertIn("Please define this beforehand", str(cm.warnings[0].message)) + _ = Accelerator() + _ = RegressionTrainingArguments(output_dir=tmp_dir, accelerator_config={"use_configured_state": True}) + AcceleratorState._reset_state(reset_partial_state=True) + @require_accelerate_version_min_0_28 def test_accelerator_config_from_dict_grad_accum_num_steps(self): with tempfile.TemporaryDirectory() as tmp_dir: From 616bb11d487aabc231bb230b245c42214ea4b254 Mon Sep 17 00:00:00 2001 From: Longjie Zheng <32992656+zhenglongjiepheonix@users.noreply.github.com> Date: Mon, 20 May 2024 10:27:24 -0400 Subject: [PATCH 14/80] Add torch.compile for Mistral (#30642) * first version * fix sliding window * fix style * add sliding window cache * fix style * address comments * fix test * fix style * move sliding window check inside cache init * revert changes on irrelevant files & add comment on SlidingWindowCache * address comments & fix style fix style * update causal mask * [run-slow] mistral * [run-slow] mistral * [run-slow] mistral * [run-slow] mistral * [run-slow] mistral * [run-slow] llama * [run-slow] mistral * [run-slow] mistral * [run-slow] mistral * revert CI from a10 to t4 * wrap up --- docs/source/en/llm_optims.md | 2 +- src/transformers/cache_utils.py | 121 +++++ src/transformers/generation/utils.py | 53 ++- .../open_llama/modeling_open_llama.py | 4 +- .../models/falcon/modeling_falcon.py | 4 +- .../models/gemma/modeling_gemma.py | 5 +- .../models/gpt_neox/modeling_gpt_neox.py | 4 +- .../modeling_gpt_neox_japanese.py | 2 +- .../models/idefics/modeling_idefics.py | 2 +- .../models/llama/modeling_llama.py | 2 - .../models/mistral/modeling_mistral.py | 421 +++++++++++------- .../models/mixtral/modeling_mixtral.py | 20 +- .../models/persimmon/modeling_persimmon.py | 4 +- src/transformers/models/phi/modeling_phi.py | 4 +- .../models/qwen2/modeling_qwen2.py | 6 +- .../models/qwen2_moe/modeling_qwen2_moe.py | 6 +- .../models/stablelm/modeling_stablelm.py | 4 +- .../models/starcoder2/modeling_starcoder2.py | 13 +- tests/models/mistral/test_modeling_mistral.py | 72 ++- 19 files changed, 512 insertions(+), 237 deletions(-) diff --git a/docs/source/en/llm_optims.md b/docs/source/en/llm_optims.md index 4b44c1d78c..5e49f0e1eb 100644 --- a/docs/source/en/llm_optims.md +++ b/docs/source/en/llm_optims.md @@ -29,7 +29,7 @@ To optimize this, you can use a kv-cache to store the past keys and values inste The *static kv-cache* solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up. > [!WARNING] -> Currently, only [Command R](./model_doc/cohere), [Gemma](./model_doc/gemma) and [Llama](./model_doc/llama2) models support static kv-cache and torch.compile. +> Currently, only [Llama](./model_doc/llama2) and a few other models support static kv-cache and torch.compile. Check [this issue](https://github.com/huggingface/transformers/issues/28981) for a live model compatibility list. For this example, let's load the [Gemma](https://hf.co/google/gemma-2b) model. diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py index 2e29e19ade..990a863e18 100644 --- a/src/transformers/cache_utils.py +++ b/src/transformers/cache_utils.py @@ -448,3 +448,124 @@ class StaticCache(Cache): # In-place ops prevent breaking the static address self.key_cache[layer_idx].zero_() self.value_cache[layer_idx].zero_() + + +class SlidingWindowCache(Cache): + """ + Sliding Window Cache class to be used with `torch.compile` for models like Mistral that support sliding window attention. + Every time when we try to update the cache, we compute the `indices` based on `cache_position >= self.config.sliding_window_size - 1`, + if true(which means the cache can not hold all the old key value states and new states together because of the sliding window constraint), + we need to do a cycle shift based on `indices` to replace the oldest states by the new key value states passed in. + + The `to_shift` is only true once we are above sliding_window_size. Thus with `sliding_window_size==64`: + + indices = (slicing + to_shift[-1].int()-1) % self.config.sliding_window_size + tensor([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 0]) + + We overwrite the cache using these, then we always write at cache_position (clamped to `sliding_window_size`) + + Parameters: + config (`PretrainedConfig): + The configuration file defining the shape-related attributes required to initialize the static cache. + max_batch_size (`int`): + The maximum batch size with which the model will be used. + max_cache_len (`int`): + The maximum sequence length with which the model will be used. + device (`torch.device`): + The device on which the cache should be initialized. Should be the same as the layer. + dtype (*optional*, defaults to `torch.float32`): + The default `dtype` to use when initializing the layer. + """ + + def __init__(self, config: PretrainedConfig, max_batch_size: int, max_cache_len: int, device, dtype=None) -> None: + if not hasattr(config, "sliding_window") or config.sliding_window is None: + raise ValueError( + "Setting `cache_implementation` to 'sliding_window' requires the model config supporting " + "sliding window attention, please check if there is a `sliding_window` field in the model " + "config and it's not set to None." + ) + + super().__init__() + self.max_batch_size = max_batch_size + # take the minimum of max_cache_len and config.sliding_window so that we allocate less memory + # when we do short-sentence generation + self.max_cache_len = config.max_position_embeddings if max_cache_len is None else max_cache_len + self.model_sliding_window_size = config.sliding_window + self.sliding_window_size = min(self.max_cache_len, self.model_sliding_window_size) + # Some model define a custom `head_dim` != config.hidden_size // config.num_attention_heads + self.head_dim = ( + config.head_dim if hasattr(config, "head_dim") else config.hidden_size // config.num_attention_heads + ) + + self.dtype = dtype if dtype is not None else torch.float32 + self.num_key_value_heads = ( + config.num_attention_heads if config.num_key_value_heads is None else config.num_key_value_heads + ) + + cache_shape = ( + config.num_hidden_layers, + max_batch_size, + self.num_key_value_heads, + self.sliding_window_size, + self.head_dim, + ) + + self.key_cache = torch.zeros(cache_shape, dtype=self.dtype, device=device) + self.value_cache = torch.zeros(cache_shape, dtype=self.dtype, device=device) + + torch._dynamo.mark_static_address(self.key_cache) + torch._dynamo.mark_static_address(self.value_cache) + + def update( + self, + key_states: torch.Tensor, + value_states: torch.Tensor, + layer_idx: int, + cache_kwargs: Optional[Dict[str, Any]] = None, + ) -> Tuple[torch.Tensor]: + cache_position = cache_kwargs.get("cache_position") + k_out = self.key_cache[layer_idx] + v_out = self.value_cache[layer_idx] + + # assume this only happens in prefill phase when prompt length > sliding_window_size + if cache_position.shape[0] > self.sliding_window_size: + k_out = key_states[:, :, -self.sliding_window_size :, :] + v_out = value_states[:, :, -self.sliding_window_size :, :] + self.key_cache[layer_idx] = k_out + self.value_cache[layer_idx] = v_out + # we should return the whole states instead of k_out, v_out to take the whole prompt + # into consideration when building kv cache instead of just throwing away tokens outside of the window + return key_states, value_states + + slicing = torch.ones(self.sliding_window_size, dtype=torch.long, device=value_states.device).cumsum(0) + cache_position = cache_position.clamp(0, self.sliding_window_size - 1) + to_shift = cache_position >= self.sliding_window_size - 1 + indices = (slicing + to_shift[-1].int() - 1) % self.sliding_window_size + + k_out = k_out[:, :, indices] + v_out = v_out[:, :, indices] + + k_out[:, :, cache_position] = key_states + v_out[:, :, cache_position] = value_states + + self.key_cache[layer_idx] = k_out + self.value_cache[layer_idx] = v_out + + return k_out, v_out + + def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: + # assume this will be called only in the first generation step + # `cache_postion` will be used in other cases + return 0 + + def get_max_length(self) -> Optional[int]: + # in theory there is no limit because the sliding window size is fixed + # no matter how long the sentence is + return None + + def reset(self): + self.key_cache.zero_() + self.value_cache.zero_() diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 4b71fe9519..bfaa665979 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -24,7 +24,7 @@ import torch import torch.distributed as dist from torch import nn -from ..cache_utils import Cache, DynamicCache, StaticCache +from ..cache_utils import Cache, DynamicCache, SlidingWindowCache, StaticCache from ..integrations.deepspeed import is_deepspeed_zero3_enabled from ..modeling_outputs import CausalLMOutputWithPast, Seq2SeqLMOutput from ..models.auto import ( @@ -96,9 +96,7 @@ logger = logging.get_logger(__name__) if is_accelerate_available(): from accelerate.hooks import AlignDevicesHook, add_hook_to_module -NEED_SETUP_CACHE_CLASSES_MAPPING = { - "static": StaticCache, -} +NEED_SETUP_CACHE_CLASSES_MAPPING = {"static": StaticCache, "sliding_window": SlidingWindowCache} @dataclass @@ -1326,24 +1324,33 @@ class GenerationMixin: model_kwargs["cache_position"] = torch.arange(past_length, cur_len, device=input_ids.device) return model_kwargs - def _get_static_cache(self, max_batch_size: int, max_cache_len: int) -> StaticCache: + def _get_cache(self, cache_implementation: str, max_batch_size: int, max_cache_len: int) -> Cache: """ - Sets a static cache for `generate`, that will persist across calls. A new cache will only be initialized a + Sets a cache for `generate`, that will persist across calls. A new cache will only be initialized a new `generate` call requires a larger cache. - Returns the resulting static cache object. + Returns the resulting cache object. """ - needs_new_cache = ( - not hasattr(self, "_static_cache") - or self._static_cache.max_batch_size < max_batch_size - or self._static_cache.max_cache_len < max_cache_len + cache_cls: Cache = NEED_SETUP_CACHE_CLASSES_MAPPING[cache_implementation] + need_new_cache = ( + not hasattr(self, "_cache") + or (not isinstance(self._cache, cache_cls)) + or self._cache.max_batch_size < max_batch_size ) - if needs_new_cache: + if cache_implementation == "sliding_window": + need_new_cache = need_new_cache or ( + self._cache.sliding_window_size < self._cache.model_sliding_window_size + and max_cache_len > self._cache.max_cache_len + ) + elif cache_implementation == "static": + need_new_cache = need_new_cache or self._cache.max_cache_len < max_cache_len + + if need_new_cache: if hasattr(self.config, "_pre_quantization_dtype"): cache_dtype = self.config._pre_quantization_dtype else: cache_dtype = self.dtype - self._static_cache = StaticCache( + self._cache = cache_cls( config=self.config, max_batch_size=max_batch_size, max_cache_len=max_cache_len, @@ -1351,8 +1358,8 @@ class GenerationMixin: dtype=cache_dtype, ) else: - self._static_cache.reset() # reset the cache for a new generation - return self._static_cache + self._cache.reset() + return self._cache def _prepare_special_tokens( self, @@ -1615,14 +1622,14 @@ class GenerationMixin: "This model does not support the `cache_implementation` argument. Please check the following " "issue: https://github.com/huggingface/transformers/issues/28981." ) - if generation_config.cache_implementation == "static": - if not self._supports_static_cache: - raise ValueError( - "This model does not support `cache_implementation='static'`. Please check the following " - "issue: https://github.com/huggingface/transformers/issues/28981" - ) - model_kwargs["past_key_values"] = self._get_static_cache(batch_size, generation_config.max_length) - + if generation_config.cache_implementation == "static" and not self._supports_static_cache: + raise ValueError( + "This model does not support `cache_implementation='static'`. Please check the following " + "issue: https://github.com/huggingface/transformers/issues/28981" + ) + model_kwargs["past_key_values"] = self._get_cache( + generation_config.cache_implementation, batch_size, generation_config.max_length + ) self._validate_generated_length(generation_config, input_ids_length, has_default_max_length) # 7. determine generation mode diff --git a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py index 098f8c7da5..4e42d716e8 100644 --- a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py @@ -63,7 +63,7 @@ class OpenLlamaRMSNorm(nn.Module): return self.weight * hidden_states.to(input_dtype) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->OpenLlama +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->OpenLlama class OpenLlamaRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -154,7 +154,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index b9fbf8d70b..75346601d7 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -81,7 +81,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. @@ -123,7 +123,7 @@ def _get_unpad_data(attention_mask): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Falcon +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Falcon class FalconRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index 200f91f750..6003c91cee 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -253,7 +253,6 @@ class GemmaAttention(nn.Module): output_attentions: bool = False, use_cache: bool = False, cache_position: Optional[torch.LongTensor] = None, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: bsz, q_len, _ = hidden_states.size() @@ -265,8 +264,8 @@ class GemmaAttention(nn.Module): key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - cos, sin = self.rotary_emb(value_states, position_ids, seq_len=None) - query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, None) + cos, sin = self.rotary_emb(value_states, position_ids) + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if past_key_value is not None: # sin and cos are specific to RoPE models; cache_position needed for the static cache diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index e0b2309fc9..4980f7c636 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -522,7 +522,7 @@ def attention_mask_func(attention_scores, ltor_mask): class GPTNeoXRotaryEmbedding(nn.Module): - # Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding.__init__ + # Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding.__init__ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -614,7 +614,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. diff --git a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py index ea934581aa..24d2113178 100755 --- a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py @@ -230,7 +230,7 @@ class GPTNeoXJapaneseAttention(nn.Module): # Copied from transformers.models.gpt_neox.modeling_gpt_neox.GPTNeoXRotaryEmbedding with GPTNeoXRotaryEmbedding->RotaryEmbedding class RotaryEmbedding(nn.Module): - # Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding.__init__ + # Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding.__init__ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 68ec6ab8a5..2ed51413d9 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -478,7 +478,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 5d8a3f987a..1f4f6ac9a0 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -303,7 +303,6 @@ class LlamaAttention(nn.Module): output_attentions: bool = False, use_cache: bool = False, cache_position: Optional[torch.LongTensor] = None, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: bsz, q_len, _ = hidden_states.size() @@ -402,7 +401,6 @@ class LlamaFlashAttention2(LlamaAttention): output_attentions: bool = False, use_cache: bool = False, cache_position: Optional[torch.LongTensor] = None, - **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: if isinstance(past_key_value, StaticCache): raise ValueError( diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index a524c5958e..c54b8774ee 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Mistral model.""" +"""PyTorch Mistral model.""" + import inspect import math from typing import List, Optional, Tuple, Union @@ -29,8 +30,8 @@ from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN -from ...cache_utils import Cache, DynamicCache -from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa +from ...cache_utils import Cache, DynamicCache, SlidingWindowCache, StaticCache +from ...modeling_attn_mask_utils import AttentionMaskConverter from ...modeling_outputs import ( BaseModelOutputWithPast, CausalLMOutputWithPast, @@ -92,8 +93,6 @@ class MistralRMSNorm(nn.Module): return self.weight * hidden_states.to(input_dtype) -# copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->Mistral -# TODO @Arthur no longer copied from LLama after static cache class MistralRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -104,30 +103,22 @@ class MistralRotaryEmbedding(nn.Module): inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float().to(device) / self.dim)) self.register_buffer("inv_freq", inv_freq, persistent=False) - # Build here to make `torch.jit.trace` work. - self._set_cos_sin_cache( - seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype() - ) - - def _set_cos_sin_cache(self, seq_len, device, dtype): - self.max_seq_len_cached = seq_len - t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq) - - freqs = torch.outer(t, self.inv_freq) - # Different from paper, but it uses a different permutation in order to obtain the same calculation - emb = torch.cat((freqs, freqs), dim=-1) - self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False) - self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) - - def forward(self, x, seq_len=None): + @torch.no_grad() + # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding.forward + def forward(self, x, position_ids): # x: [bs, num_attention_heads, seq_len, head_size] - if seq_len > self.max_seq_len_cached: - self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype) - - return ( - self.cos_cached[:seq_len].to(dtype=x.dtype), - self.sin_cached[:seq_len].to(dtype=x.dtype), - ) + inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) + position_ids_expanded = position_ids[:, None, :].float() + # Force float32 since bfloat16 loses precision on long contexts + # See https://github.com/huggingface/transformers/pull/29285 + device_type = x.device.type + device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" + with torch.autocast(device_type=device_type, enabled=False): + freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos() + sin = emb.sin() + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) # Copied from transformers.models.llama.modeling_llama.rotate_half @@ -138,9 +129,8 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb -# TODO @Arthur no longer copied from LLama after static cache -def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): +# Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb +def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. Args: @@ -148,9 +138,8 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): k (`torch.Tensor`): The key tensor. cos (`torch.Tensor`): The cosine part of the rotary embedding. sin (`torch.Tensor`): The sine part of the rotary embedding. - position_ids (`torch.Tensor`): - The position indices of the tokens corresponding to the query and key tensors. For example, this can be - used to pass offsetted position ids when working with a KV-cache. + position_ids (`torch.Tensor`, *optional*): + Deprecated and unused. unsqueeze_dim (`int`, *optional*, defaults to 1): The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note @@ -161,8 +150,8 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): Returns: `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. """ - cos = cos[position_ids].unsqueeze(unsqueeze_dim) - sin = sin[position_ids].unsqueeze(unsqueeze_dim) + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) q_embed = (q * cos) + (rotate_half(q) * sin) k_embed = (k * cos) + (rotate_half(k) * sin) return q_embed, k_embed @@ -213,6 +202,7 @@ class MistralAttention(nn.Module): "when creating this class." ) + self.attention_dropout = config.attention_dropout self.hidden_size = config.hidden_size self.num_heads = config.num_attention_heads self.head_dim = self.hidden_size // self.num_heads @@ -221,7 +211,6 @@ class MistralAttention(nn.Module): self.max_position_embeddings = config.max_position_embeddings self.rope_theta = config.rope_theta self.is_causal = True - self.attention_dropout = config.attention_dropout if (self.head_dim * self.num_heads) != self.hidden_size: raise ValueError( @@ -231,7 +220,7 @@ class MistralAttention(nn.Module): self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False) self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False) self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False) - self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False) + self.o_proj = nn.Linear(self.hidden_size, self.hidden_size, bias=False) self.rotary_emb = MistralRotaryEmbedding( self.head_dim, @@ -239,9 +228,7 @@ class MistralAttention(nn.Module): base=self.rope_theta, ) - def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): - return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() - + # Copied from transformers.models.gemma.modeling_gemma.GemmaAttention.forward with Gemma->Mistral def forward( self, hidden_states: torch.Tensor, @@ -250,6 +237,7 @@ class MistralAttention(nn.Module): past_key_value: Optional[Cache] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: bsz, q_len, _ = hidden_states.size() @@ -261,41 +249,22 @@ class MistralAttention(nn.Module): key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - kv_seq_len = key_states.shape[-2] - if past_key_value is not None: - if self.layer_idx is None: - raise ValueError( - f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " - "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " - "with a layer index." - ) - kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) - cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) - query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) + cos, sin = self.rotary_emb(value_states, position_ids) + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if past_key_value is not None: - cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + # sin and cos are specific to RoPE models; cache_position needed for the static cache + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) - # repeat k/v heads if n_kv_heads < n_heads key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) - if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - raise ValueError( - f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is" - f" {attn_weights.size()}" - ) - - if attention_mask is not None: - if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - raise ValueError( - f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" - ) - - attn_weights = attn_weights + attention_mask + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_weights = attn_weights + causal_mask # upcast attention to fp32 attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) @@ -309,8 +278,8 @@ class MistralAttention(nn.Module): ) attn_output = attn_output.transpose(1, 2).contiguous() - attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) + attn_output = attn_output.view(bsz, q_len, -1) attn_output = self.o_proj(attn_output) if not output_attentions: @@ -343,7 +312,16 @@ class MistralFlashAttention2(MistralAttention): past_key_value: Optional[Cache] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ): + if isinstance(past_key_value, StaticCache): + raise ValueError( + "`static` cache implementation is not compatible with `attn_implementation==flash_attention_2` " + "make sure to use `sdpa` in the mean time, and open an issue at https://github.com/huggingface/transformers" + ) + + output_attentions = False + bsz, q_len, _ = hidden_states.size() query_states = self.q_proj(hidden_states) @@ -356,19 +334,10 @@ class MistralFlashAttention2(MistralAttention): kv_seq_len = key_states.shape[-2] if past_key_value is not None: - if self.layer_idx is None: - raise ValueError( - f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " - "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " - "with a layer index." - ) - kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + kv_seq_len += cache_position[0] - # Because the input can be padded, the absolute sequence length depends on the max position id. - rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item()) + 1 - cos, sin = self.rotary_emb(value_states, seq_len=rotary_seq_len) - - query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) + cos, sin = self.rotary_emb(value_states, position_ids) + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) use_sliding_windows = ( _flash_supports_window_size @@ -605,8 +574,7 @@ class MistralFlashAttention2(MistralAttention): ) -# copied from transformers.models.llama.modeling_llama.LlamaSdpaAttention with Llama->Mistral -# TODO @Arthur no longer copied from LLama after static cache +# Copied from transformers.models.llama.modeling_llama.LlamaSdpaAttention with Llama->Mistral class MistralSdpaAttention(MistralAttention): """ Mistral attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from @@ -623,6 +591,7 @@ class MistralSdpaAttention(MistralAttention): past_key_value: Optional[Cache] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: if output_attentions: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. @@ -637,6 +606,7 @@ class MistralSdpaAttention(MistralAttention): past_key_value=past_key_value, output_attentions=output_attentions, use_cache=use_cache, + cache_position=cache_position, ) bsz, q_len, _ = hidden_states.size() @@ -649,43 +619,38 @@ class MistralSdpaAttention(MistralAttention): key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - kv_seq_len = key_states.shape[-2] - if past_key_value is not None: - kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) - cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + cos, sin = self.rotary_emb(value_states, position_ids) - query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if past_key_value is not None: - cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + # sin and cos are specific to RoPE models; cache_position needed for the static cache + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) + causal_mask = attention_mask if attention_mask is not None: - if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - raise ValueError( - f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" - ) + causal_mask = causal_mask[:, :, :, : key_states.shape[-2]] # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, # Reference: https://github.com/pytorch/pytorch/issues/112577. - if query_states.device.type == "cuda" and attention_mask is not None: + if query_states.device.type == "cuda" and causal_mask is not None: query_states = query_states.contiguous() key_states = key_states.contiguous() value_states = value_states.contiguous() # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + is_causal = True if causal_mask is None and q_len > 1 else False attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, key_states, value_states, - attn_mask=attention_mask, + attn_mask=causal_mask, dropout_p=self.attention_dropout if self.training else 0.0, is_causal=is_causal, ) @@ -705,12 +670,13 @@ MISTRAL_ATTENTION_CLASSES = { } +# Copied from transformers.models.llama.modeling_llama.LlamaDecoderLayer with Llama->Mistral, LLAMA->MISTRAL class MistralDecoderLayer(nn.Module): def __init__(self, config: MistralConfig, layer_idx: int): super().__init__() self.hidden_size = config.hidden_size - self.self_attn = MISTRAL_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx) + self.self_attn = MISTRAL_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx) self.mlp = MistralMLP(config) self.input_layernorm = MistralRMSNorm(config.hidden_size, eps=config.rms_norm_eps) @@ -721,15 +687,17 @@ class MistralDecoderLayer(nn.Module): hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_value: Optional[Tuple[torch.Tensor]] = None, + past_key_value: Optional[Cache] = None, output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: """ Args: hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` - attention_mask (`torch.FloatTensor`, *optional*): attention mask of size - `(batch, sequence_length)` where padding elements are indicated by 0. + attention_mask (`torch.FloatTensor`, *optional*): + attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1, + query_sequence_length, key_sequence_length)` if default attention is used. output_attentions (`bool`, *optional*): Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. @@ -751,6 +719,7 @@ class MistralDecoderLayer(nn.Module): past_key_value=past_key_value, output_attentions=output_attentions, use_cache=use_cache, + cache_position=cache_position, ) hidden_states = residual + hidden_states @@ -801,6 +770,7 @@ class MistralPreTrainedModel(PreTrainedModel): _supports_flash_attn_2 = True _supports_sdpa = True _supports_cache_class = True + _supports_static_cache = True def _init_weights(self, module): std = self.config.initializer_range @@ -924,12 +894,13 @@ class MistralModel(MistralPreTrainedModel): input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, ) -> Union[Tuple, BaseModelOutputWithPast]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -940,74 +911,38 @@ class MistralModel(MistralPreTrainedModel): return_dict = return_dict if return_dict is not None else self.config.use_return_dict # retrieve input_ids and inputs_embeds - if input_ids is not None and inputs_embeds is not None: - raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time") - elif input_ids is not None: - batch_size, seq_length = input_ids.shape - elif inputs_embeds is not None: - batch_size, seq_length, _ = inputs_embeds.shape - else: - raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds") - - if self.gradient_checkpointing and self.training: - if use_cache: - logger.warning_once( - "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." - ) - use_cache = False - - past_key_values_length = 0 - - if use_cache: - use_legacy_cache = not isinstance(past_key_values, Cache) - if use_legacy_cache: - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - past_key_values_length = past_key_values.get_usable_length(seq_length) - - if position_ids is None: - device = input_ids.device if input_ids is not None else inputs_embeds.device - position_ids = torch.arange( - past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError( + "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" ) - position_ids = position_ids.unsqueeze(0).view(-1, seq_length) - else: - position_ids = position_ids.view(-1, seq_length).long() + + if self.gradient_checkpointing and self.training and use_cache: + logger.warning_once( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False if inputs_embeds is None: inputs_embeds = self.embed_tokens(input_ids) - if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache: - is_padding_right = attention_mask[:, -1].sum().item() != batch_size - if is_padding_right: - raise ValueError( - "You are attempting to perform batched generation with padding_side='right'" - " this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to " - " call `tokenizer.padding_side = 'left'` before tokenizing the input. " - ) + return_legacy_cache = False + if use_cache and not isinstance(past_key_values, Cache): + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + return_legacy_cache = True - if self._attn_implementation == "flash_attention_2": - # 2d mask is passed through the layers - attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None - elif self._attn_implementation == "sdpa" and not output_attentions: - # output_attentions=True can not be supported when using SDPA, and we fall back on - # the manual implementation that requires a 4D causal mask in all cases. - attention_mask = _prepare_4d_causal_attention_mask_for_sdpa( - attention_mask, - (batch_size, seq_length), - inputs_embeds, - past_key_values_length, - sliding_window=self.config.sliding_window, - ) - else: - # 4d mask is passed through the layers - attention_mask = _prepare_4d_causal_attention_mask( - attention_mask, - (batch_size, seq_length), - inputs_embeds, - past_key_values_length, - sliding_window=self.config.sliding_window, + if cache_position is None: + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device ) + if position_ids is None: + position_ids = cache_position.unsqueeze(0) + + causal_mask = self._update_causal_mask( + attention_mask, inputs_embeds, cache_position, past_key_values, use_cache, output_attentions + ) + hidden_states = inputs_embeds # decoder layers @@ -1023,20 +958,22 @@ class MistralModel(MistralPreTrainedModel): layer_outputs = self._gradient_checkpointing_func( decoder_layer.__call__, hidden_states, - attention_mask, + causal_mask, position_ids, past_key_values, output_attentions, use_cache, + cache_position, ) else: layer_outputs = decoder_layer( hidden_states, - attention_mask=attention_mask, + attention_mask=causal_mask, position_ids=position_ids, past_key_value=past_key_values, output_attentions=output_attentions, use_cache=use_cache, + cache_position=cache_position, ) hidden_states = layer_outputs[0] @@ -1053,9 +990,9 @@ class MistralModel(MistralPreTrainedModel): if output_hidden_states: all_hidden_states += (hidden_states,) - next_cache = None - if use_cache: - next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache + next_cache = next_decoder_cache if use_cache else None + if return_legacy_cache: + next_cache = next_cache.to_legacy_cache() if not return_dict: return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None) @@ -1066,6 +1003,113 @@ class MistralModel(MistralPreTrainedModel): attentions=all_self_attns, ) + def _update_causal_mask( + self, + attention_mask: torch.Tensor, + input_tensor: torch.Tensor, + cache_position: torch.Tensor, + past_key_values: Cache, + use_cache: bool, + output_attentions: bool, + ): + # TODO: As of torch==2.2.0, the `attention_mask` passed to the model in `generate` is 2D and of dynamic length even when the static + # KV cache is used. This is an issue for torch.compile which then recaptures cudagraphs at each decode steps due to the dynamic shapes. + # (`recording cudagraph tree for symint key 13`, etc.), which is VERY slow. A workaround is `@torch.compiler.disable`, but this prevents using + # `fullgraph=True`. See more context in https://github.com/huggingface/transformers/pull/29114 + + if self._attn_implementation == "flash_attention_2": + if attention_mask is not None and use_cache: + is_padding_right = attention_mask[:, -1].sum().item() != input_tensor.size()[0] + if is_padding_right: + raise ValueError( + "You are attempting to perform batched generation with padding_side='right'" + " this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to " + " call `tokenizer.padding_side = 'left'` before tokenizing the input. " + ) + if attention_mask is not None and 0.0 in attention_mask: + return attention_mask + return None + + # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in + # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail + # to infer the attention mask. + + # cache_position must be valid here no matter which cache we use + past_seen_tokens = cache_position[0] if past_key_values is not None else 0 + using_static_cache = isinstance(past_key_values, StaticCache) + using_sliding_window_cache = isinstance(past_key_values, SlidingWindowCache) + + if ( + self.config._attn_implementation == "sdpa" + and not (using_static_cache or using_sliding_window_cache) + and not output_attentions + ): + if AttentionMaskConverter._ignore_causal_mask_sdpa( + attention_mask, + inputs_embeds=input_tensor, + past_key_values_length=past_seen_tokens, + sliding_window=self.config.sliding_window, + is_training=self.training, + ): + return None + + dtype, device = input_tensor.dtype, input_tensor.device + min_dtype = torch.finfo(dtype).min + sequence_length = input_tensor.shape[1] + # SlidingWindowCache + if using_sliding_window_cache: + target_length = max(sequence_length, self.config.sliding_window) + # StaticCache + elif using_static_cache: + target_length = past_key_values.get_max_length() + # DynamicCache or no cache + else: + target_length = ( + attention_mask.shape[-1] + if isinstance(attention_mask, torch.Tensor) + else past_seen_tokens + sequence_length + 1 + ) + + if attention_mask is not None and attention_mask.dim() == 4: + # in this case we assume that the mask comes already in inverted form and requires no inversion or slicing + if attention_mask.max() != 0: + raise ValueError("Custom 4D attention mask should be passed in inverted form with max==0`") + causal_mask = attention_mask + else: + causal_mask = torch.full( + (sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device + ) + exclude_mask = torch.arange(target_length, device=device) > cache_position.reshape(-1, 1) + if self.config.sliding_window is not None: + if not using_sliding_window_cache or sequence_length > self.config.sliding_window: + exclude_mask |= torch.arange(target_length, device=device) <= ( + cache_position.reshape(-1, 1) - self.config.sliding_window + ) + causal_mask *= exclude_mask + causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1) + if attention_mask is not None: + causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit + if attention_mask.dim() == 2: + mask_length = attention_mask.shape[-1] + padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :] + padding_mask = padding_mask == 0 + causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill( + padding_mask, min_dtype + ) + + if ( + self.config._attn_implementation == "sdpa" + and attention_mask is not None + and attention_mask.device.type == "cuda" + and not output_attentions + ): + # Attend to all tokens in fully masked rows in the causal_mask, for example the relevant first rows when + # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path. + # Details: https://github.com/pytorch/pytorch/issues/110213 + causal_mask = AttentionMaskConverter._unmask_unattended(causal_mask, min_dtype) + + return causal_mask + class MistralForCausalLM(MistralPreTrainedModel): _tied_weights_keys = ["lm_head.weight"] @@ -1104,13 +1148,14 @@ class MistralForCausalLM(MistralPreTrainedModel): input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, ) -> Union[Tuple, CausalLMOutputWithPast]: r""" Args: @@ -1155,6 +1200,7 @@ class MistralForCausalLM(MistralPreTrainedModel): output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, + cache_position=cache_position, ) hidden_states = outputs[0] @@ -1187,14 +1233,27 @@ class MistralForCausalLM(MistralPreTrainedModel): ) def prepare_inputs_for_generation( - self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs + self, + input_ids, + past_key_values=None, + attention_mask=None, + inputs_embeds=None, + cache_position=None, + use_cache=True, + **kwargs, ): # Omit tokens covered by past_key_values + past_length = 0 if past_key_values is not None: if isinstance(past_key_values, Cache): - cache_length = past_key_values.get_seq_length() - past_length = past_key_values.seen_tokens - max_cache_length = past_key_values.get_max_length() + past_length = cache_position[0] if cache_position is not None else past_key_values.get_seq_length() + max_cache_length = ( + torch.tensor(past_key_values.get_max_length(), device=input_ids.device) + if past_key_values.get_max_length() is not None + else None + ) + cache_length = past_length if max_cache_length is None else torch.min(max_cache_length, past_length) + # TODO joao: remove this `else` after `generate` prioritizes `Cache` objects else: cache_length = past_length = past_key_values[0][0].shape[2] max_cache_length = None @@ -1227,17 +1286,33 @@ class MistralForCausalLM(MistralPreTrainedModel): if past_key_values: position_ids = position_ids[:, -input_ids.shape[1] :] + # crop the attention_mask to sliding window size during decode phase if using SlidingWindowCache + if ( + past_length > 0 + and attention_mask is not None + and isinstance(past_key_values, SlidingWindowCache) + and attention_mask.shape[1] > past_key_values.sliding_window_size + ): + attention_mask = attention_mask[:, -past_key_values.sliding_window_size :] + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step if inputs_embeds is not None and past_key_values is None: model_inputs = {"inputs_embeds": inputs_embeds} else: - model_inputs = {"input_ids": input_ids} + model_inputs = {"input_ids": input_ids.contiguous()} + + input_length = position_ids.shape[-1] if position_ids is not None else input_ids.shape[-1] + if cache_position is None: + cache_position = torch.arange(past_length, past_length + input_length, device=input_ids.device) + elif use_cache: + cache_position = cache_position[-input_length:] model_inputs.update( { "position_ids": position_ids, + "cache_position": cache_position, "past_key_values": past_key_values, - "use_cache": kwargs.get("use_cache"), + "use_cache": use_cache, "attention_mask": attention_mask, } ) diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index dfdd114728..70e1746392 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -181,7 +181,8 @@ class MixtralRMSNorm(nn.Module): return self.weight * hidden_states.to(input_dtype) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Mixtral +# copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Mixtral +# TODO @longjie no longer copied from Mistral after static cache class MixtralRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -226,7 +227,8 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# TODO @longjie no longer copied from Mistral after static cache def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. @@ -268,7 +270,8 @@ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) -# Copied from transformers.models.mistral.modeling_mistral.MistralAttention with Mistral->Mixtral +# copied from transformers.models.mistral.modeling_mistral.MistralAttention with Mistral->Mixtral +# TODO @longjie no longer copied from Mistral after static cache class MixtralAttention(nn.Module): """ Multi-headed attention from 'Attention Is All You Need' paper. Modified to use sliding window attention: Longformer @@ -392,7 +395,8 @@ class MixtralAttention(nn.Module): return attn_output, attn_weights, past_key_value -# Copied from transformers.models.mistral.modeling_mistral.MistralFlashAttention2 with Mistral->Mixtral +# copied from transformers.models.mistral.modeling_mistral.MistralFlashAttention2 with Mistral->Mixtral +# TODO @longjie no longer copied from Mistral after static cache class MixtralFlashAttention2(MixtralAttention): """ Mixtral flash attention module. This module inherits from `MixtralAttention` as the weights of the module stays @@ -679,7 +683,8 @@ class MixtralFlashAttention2(MixtralAttention): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralSdpaAttention with Mistral->Mixtral +# copied from transformers.models.mistral.modeling_mistral.MistralSdpaAttention with Mistral->Mixtral +# TODO @longjie no longer copied from Mistral after static cache class MixtralSdpaAttention(MixtralAttention): """ Mixtral attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from @@ -958,7 +963,7 @@ MIXTRAL_START_DOCSTRING = r""" "The bare Mixtral Model outputting raw hidden-states without any specific head on top.", MIXTRAL_START_DOCSTRING, ) -# Copied from transformers.models.mistral.modeling_mistral.MistralPreTrainedModel with Mistral->Mixtral +# Copied from transformers.models.qwen2.modeling_qwen2.Qwen2PreTrainedModel with Qwen2->Mixtral class MixtralPreTrainedModel(PreTrainedModel): config_class = MixtralConfig base_model_prefix = "model" @@ -1052,7 +1057,8 @@ MIXTRAL_INPUTS_DOCSTRING = r""" "The bare Mixtral Model outputting raw hidden-states without any specific head on top.", MIXTRAL_START_DOCSTRING, ) -# Copied from transformers.models.mistral.modeling_mistral.MistralModel with MISTRAL->MIXTRAL,Mistral->Mixtral +# copied from transformers.models.mistral.modeling_mistral.MistralModel with MISTRAL->MIXTRAL,Mistral->Mixtral +# TODO @longjie no longer copied from Mistral after static cache class MixtralModel(MixtralPreTrainedModel): """ Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MixtralDecoderLayer`] diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 75ba7163ba..ea2bd074ee 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -45,7 +45,7 @@ logger = logging.get_logger(__name__) _CONFIG_FOR_DOC = "PersimmonConfig" -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Persimmon +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Persimmon class PersimmonRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -137,7 +137,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 1f82b09a25..1436138f91 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -76,7 +76,7 @@ def _get_unpad_data(attention_mask): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Phi +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Phi class PhiRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -168,7 +168,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index f90d61d6e6..98a9e27c9b 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -94,7 +94,7 @@ class Qwen2RMSNorm(nn.Module): return self.weight * hidden_states.to(input_dtype) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Qwen2 +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Qwen2 class Qwen2RotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -139,7 +139,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. @@ -611,7 +611,7 @@ class Qwen2FlashAttention2(Qwen2Attention): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralSdpaAttention with Mistral->Qwen2 +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralSdpaAttention with Mixtral->Qwen2 class Qwen2SdpaAttention(Qwen2Attention): """ Qwen2 attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 21d8d608ee..0e4b4b75e8 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -170,7 +170,7 @@ class Qwen2MoeRMSNorm(nn.Module): return self.weight * hidden_states.to(input_dtype) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Qwen2Moe +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Qwen2Moe class Qwen2MoeRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -215,7 +215,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. @@ -689,7 +689,7 @@ class Qwen2MoeFlashAttention2(Qwen2MoeAttention): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralSdpaAttention with Mistral->Qwen2Moe +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralSdpaAttention with Mixtral->Qwen2Moe class Qwen2MoeSdpaAttention(Qwen2MoeAttention): """ Qwen2Moe attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index e8d07340d3..5b81abc693 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -71,7 +71,7 @@ def _get_unpad_data(attention_mask): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->StableLm +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->StableLm class StableLmRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -163,7 +163,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index d808a3fd25..e0abef4b41 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -74,7 +74,7 @@ def _get_unpad_data(attention_mask): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Starcoder2 +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding with Mixtral->Starcoder2 class Starcoder2RotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() @@ -119,7 +119,7 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb +# Copied from transformers.models.mixtral.modeling_mixtral.apply_rotary_pos_emb def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): """Applies Rotary Position Embedding to the query and key tensors. @@ -590,7 +590,7 @@ class Starcoder2FlashAttention2(Starcoder2Attention): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralSdpaAttention with Mistral->Starcoder2 +# Copied from transformers.models.mixtral.modeling_mixtral.MixtralSdpaAttention with Mixtral->Starcoder2 class Starcoder2SdpaAttention(Starcoder2Attention): """ Starcoder2 attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from @@ -703,7 +703,7 @@ class Starcoder2DecoderLayer(nn.Module): self.input_layernorm = nn.LayerNorm(config.hidden_size, eps=config.norm_epsilon) self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.norm_epsilon) - # Copied from transformers.models.mistral.modeling_mistral.MistralDecoderLayer.forward + # Copied from transformers.models.qwen2.modeling_qwen2.Qwen2DecoderLayer.forward def forward( self, hidden_states: torch.Tensor, @@ -780,7 +780,7 @@ STARCODER2_START_DOCSTRING = r""" "The bare Starcoder2 Model outputting raw hidden-states without any specific head on top.", STARCODER2_START_DOCSTRING, ) -# Copied from transformers.models.mistral.modeling_mistral.MistralPreTrainedModel with Mistral->Starcoder2 +# Copied from transformers.models.qwen2.modeling_qwen2.Qwen2PreTrainedModel with Qwen2->Starcoder2 class Starcoder2PreTrainedModel(PreTrainedModel): config_class = Starcoder2Config base_model_prefix = "model" @@ -1057,7 +1057,7 @@ class Starcoder2Model(Starcoder2PreTrainedModel): ) -# Copied from transformers.models.mistral.modeling_mistral.MistralForCausalLM with MISTRAL->STARCODER2,Mistral-7B-v0.1->starcoder2-7b_16k,Mistral->Starcoder2,mistralai->bigcode +# Copied from transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM with QWEN2->STARCODER2,Qwen2->Starcoder2 class Starcoder2ForCausalLM(Starcoder2PreTrainedModel): _tied_weights_keys = ["lm_head.weight"] @@ -1090,6 +1090,7 @@ class Starcoder2ForCausalLM(Starcoder2PreTrainedModel): @add_start_docstrings_to_model_forward(STARCODER2_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) + # Ignore copy def forward( self, input_ids: torch.LongTensor = None, diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 10dcb3bf4d..13bdf83c5b 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -12,14 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Mistral model. """ - +"""Testing suite for the PyTorch Mistral model.""" import gc import tempfile import unittest import pytest +from packaging import version from transformers import AutoTokenizer, MistralConfig, is_torch_available, set_seed from transformers.testing_utils import ( @@ -648,6 +648,74 @@ class MistralIntegrationTest(unittest.TestCase): backend_empty_cache(torch_device) gc.collect() + @slow + def test_compile_static_cache(self): + # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 + # work as intended. See https://github.com/pytorch/pytorch/issues/121943 + if version.parse(torch.__version__) < version.parse("2.3.0"): + self.skipTest("This test requires torch >= 2.3 to run.") + + NUM_TOKENS_TO_GENERATE = 40 + EXPECTED_TEXT_COMPLETION = { + 8: [ + "My favourite condiment is 100% ketchup. I love it on everything. " + "I’m not a big fan of mustard, mayo, or relish. I’m not a fan of pickles" + ], + 7: [ + "My favourite condiment is 100% ketchup. I love it on everything. " + "I’m not a big fan of mustard, mayo, or relish. I’m not a fan of pickles" + ], + } + + prompts = ["My favourite condiment is "] + tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False) + tokenizer.pad_token = tokenizer.eos_token + model = MistralForCausalLM.from_pretrained( + "mistralai/Mistral-7B-v0.1", device_map="sequential", torch_dtype=torch.float16 + ) + inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device) + + # Dynamic Cache + generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False) + dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) + self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], dynamic_text) + + # Static Cache + generated_ids = model.generate( + **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static" + ) + static_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) + self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_text) + + # Sliding Window Cache + generated_ids = model.generate( + **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="sliding_window" + ) + static_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) + self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_text) + + # Static Cache + compile + forward_function = model.forward + model.forward = torch.compile(forward_function, mode="reduce-overhead", fullgraph=True) + generated_ids = model.generate( + **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static" + ) + static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) + self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text) + + # Sliding Window Cache + compile + torch._dynamo.reset() + model.forward = torch.compile(forward_function, mode="reduce-overhead", fullgraph=True) + generated_ids = model.generate( + **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="sliding_window" + ) + static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) + self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text) + + del model + backend_empty_cache(torch_device) + gc.collect() + @slow @require_torch_gpu From 0df888ffb72ea370555efdef45985378d3cc7b2b Mon Sep 17 00:00:00 2001 From: Aaron Jimenez Date: Mon, 20 May 2024 15:48:52 -0800 Subject: [PATCH 15/80] [docs] Spanish translation of model_memory_anatomy.md (#30885) * add model_memory_anatomy to es/_toctree.yml * copy model_memory_anatomy.md to es/ * translate first section * translate doc * chage forward activations * fix sentence and and link to Trainer * fix Trainer link --- docs/source/es/_toctree.yml | 2 + docs/source/es/model_memory_anatomy.md | 239 +++++++++++++++++++++++++ 2 files changed, 241 insertions(+) create mode 100644 docs/source/es/model_memory_anatomy.md diff --git a/docs/source/es/_toctree.yml b/docs/source/es/_toctree.yml index cf1ae39c03..5a20aca2e5 100644 --- a/docs/source/es/_toctree.yml +++ b/docs/source/es/_toctree.yml @@ -102,4 +102,6 @@ title: Perplejidad de los modelos de longitud fija - local: pipeline_webserver title: Flujo de trabajo para la inferencia de los servidores web + - local: model_memory_anatomy + title: Anatomía del entrenamiento de los modelos title: Guías conceptuales diff --git a/docs/source/es/model_memory_anatomy.md b/docs/source/es/model_memory_anatomy.md new file mode 100644 index 0000000000..6a220da993 --- /dev/null +++ b/docs/source/es/model_memory_anatomy.md @@ -0,0 +1,239 @@ + + +# Anatomía del entrenamiento de los modelos + +Para entender las técnicas de optimización del rendimiento que se pueden aplicar para mejorar la eficiencia en la velocidad del entrenamiento de los modelos y la utilización de la memoria, es útil familiarizarse con cómo se utiliza la GPU durante el entrenamiento y cómo varía la intensidad de cálculo según la operación realizada. + +Empecemos explorando un ejemplo enfocado en la utilización de la GPU y la ejecución del entrenamiento de un modelo. Para la demostración, necesitaremos instalar algunas bibliotecas: + +```bash +pip install transformers datasets accelerate nvidia-ml-py3 +``` + +La biblioteca `nvidia-ml-py3` nos permite monitorear la utilización de memoria de los modelos desde Python. Es posible que estés familiarizado con el comando `nvidia-smi` en la terminal, esta biblioteca nos permite acceder a la misma información en Python directamente. + +Luego, creamos algunos datos ficticios: IDs de tokens aleatorios entre 100 y 30000 y etiquetas binarias para un clasificador. En total, obtenemos 512 secuencias cada una con longitud 512 y las almacenamos en un [`~datasets.Dataset`] con formato PyTorch. + + +```py +>>> import numpy as np +>>> from datasets import Dataset + + +>>> seq_len, dataset_size = 512, 512 +>>> dummy_data = { +... "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)), +... "labels": np.random.randint(0, 1, (dataset_size)), +... } +>>> ds = Dataset.from_dict(dummy_data) +>>> ds.set_format("pt") +``` + +Para imprimir estadísticas resumidas para la utilización de la GPU y la ejecución del entrenamiento con [`Trainer`](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.Trainer), definimos dos funciones auxiliares: + +```py +>>> from pynvml import * + + +>>> def print_gpu_utilization(): +... nvmlInit() +... handle = nvmlDeviceGetHandleByIndex(0) +... info = nvmlDeviceGetMemoryInfo(handle) +... print(f"GPU memory occupied: {info.used//1024**2} MB.") + + +>>> def print_summary(result): +... print(f"Time: {result.metrics['train_runtime']:.2f}") +... print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}") +... print_gpu_utilization() +``` + +Comencemos comprobando que la memoria GPU este libre: + +```py +>>> print_gpu_utilization() +GPU memory occupied: 0 MB. +``` + +Parece estar bien: la memoria de la GPU no está ocupada como esperaríamos antes de cargar cualquier modelo. Si no es el caso en tu máquina, asegúrate de detener todos los procesos que estén utilizando la memoria de la GPU. Sin embargo, no toda la memoria libre de la GPU puede ser utilizada por el usuario. Cuando se carga un modelo en la GPU, también se cargan los kernels, lo que puede ocupar 1-2GB de memoria. Para ver cuánta memoria será ocupada por defecto, cargemos un tensor diminuto en la GPU, lo que también desencadena la carga de los kernels. + +```py +>>> import torch + + +>>> torch.ones((1, 1)).to("cuda") +>>> print_gpu_utilization() +GPU memory occupied: 1343 MB. +``` + +Vemos que los kernels solos ocupan 1,3GB de memoria de la GPU. Ahora, veamos cuánto espacio ocupa el modelo. + +## Cargar el Modelo + +Primero, cargamos el modelo `google-bert/bert-large-uncased`. Los pesos del modelo son cargados directamente en la GPU para que podamos verificar cuánto espacio ocupan solo los pesos. + +```py +>>> from transformers import AutoModelForSequenceClassification + + +>>> model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda") +>>> print_gpu_utilization() +GPU memory occupied: 2631 MB. +``` + +Podemos ver que los pesos del modelo solos ocupan 1,3 GB de memoria de la GPU. El número exacto depende de la GPU específica que estés utilizando. Ten en cuenta que en GPUs más modernas, un modelo puede ocupar más espacio ya que los pesos se cargan de manera optimizada lo cual acelera el uso del modelo. Ahora también podemos verificar rápidamente si obtenemos el mismo resultado que con la CLI de `nvidia-smi`: + +```bash +nvidia-smi +``` + +```bash +Tue Jan 11 08:58:05 2022 ++-----------------------------------------------------------------------------+ +| NVIDIA-SMI 460.91.03 Driver Version: 460.91.03 CUDA Version: 11.2 | +|-------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|===============================+======================+======================| +| 0 Tesla V100-SXM2... On | 00000000:00:04.0 Off | 0 | +| N/A 37C P0 39W / 300W | 2631MiB / 16160MiB | 0% Default | +| | | N/A | ++-------------------------------+----------------------+----------------------+ + ++-----------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=============================================================================| +| 0 N/A N/A 3721 C ...nvs/codeparrot/bin/python 2629MiB | ++-----------------------------------------------------------------------------+ +``` + +Obtenemos el mismo número que antes y también puedes ver que estamos utilizando una GPU V100 con 16GB de memoria. Ahora podemos empezar a entrenar el modelo y ver cómo cambia el consumo de memoria de la GPU. Primero, configuramos algunos argumentos de entrenamiento estándar: + +```py +default_args = { + "output_dir": "tmp", + "eval_strategy": "steps", + "num_train_epochs": 1, + "log_level": "error", + "report_to": "none", +} +``` + + + +Si planeas ejecutar varias pruebas, reinicie el kernel de Python entre cada prueba para borrar correctamente la memoria. + + + +## Utilización de la memoria en el entrenamiento + +Vamos a utilizar el [`Trainer`](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.Trainer) y entrenar el modelo sin utilizar ninguna técnica de optimización del rendimiento de la GPU y un tamaño de lote de 4: + +```py +>>> from transformers import TrainingArguments, Trainer, logging + +>>> logging.set_verbosity_error() + + +>>> training_args = TrainingArguments(per_device_train_batch_size=4, **default_args) +>>> trainer = Trainer(model=model, args=training_args, train_dataset=ds) +>>> result = trainer.train() +>>> print_summary(result) +``` + +``` +Time: 57.82 +Samples/second: 8.86 +GPU memory occupied: 14949 MB. +``` + +Vemos que incluso un tamaño de lote relativamente pequeño casi llena toda la memoria de nuestra GPU. Sin embargo, un tamaño de lote más grande a menudo puede resultar en una convergencia del modelo más rápida o un mejor rendimiento final. Así que idealmente queremos ajustar el tamaño del lote a las necesidades del modelo y no a las limitaciones de la GPU. Lo interesante es que utilizamos mucha más memoria que el tamaño del modelo. +Para entender un poco mejor por qué es el caso, echemos un vistazo a las operaciones y necesidades de memoria de un modelo. + +## Anatomía de las Operaciones del Modelo + +La arquitectura de los transformers incluye 3 grupos principales de operaciones agrupadas a continuación por intensidad de cálculo. + +1. **Contracciones de Tensores** + + Las capas lineales y componentes de la Atención Multi-Head realizan **multiplicaciones matriciales por lotes**. Estas operaciones son la parte más intensiva en cálculo del entrenamiento de los transformers. + +2. **Normalizaciones Estadísticas** + + Softmax y normalización de capas son menos intensivas en cálculo que las contracciones de tensores, e implican una o más **operaciones de reducción**, cuyo resultado se aplica luego mediante un mapa. + +3. **Operadores por Elemento** + + Estos son los operadores restantes: **sesgos, dropout, activaciones y conexiones residuales**. Estas son las operaciones menos intensivas en cálculo. + +Este conocimiento puede ser útil al analizar cuellos de botella de rendimiento. + +Este resumen se deriva de [Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020](https://arxiv.org/abs/2007.00072) + + +## Anatomía de la Memoria del Modelo + +Hemos visto que al entrenar un modelo se utiliza mucha más memoria que solo poner el modelo en la GPU. Esto se debe a que hay muchos componentes durante el entrenamiento que utilizan memoria de la GPU. Los componentes en memoria de la GPU son los siguientes: + +1. pesos del modelo +2. estados del optimizador +3. gradientes +4. activaciones hacia adelante guardadas para el cálculo del gradiente +5. buffers temporales +6. memoria específica de funcionalidad + +Un modelo típico entrenado en precisión mixta con AdamW requiere 18 bytes por parámetro del modelo más memoria de activación. Para la inferencia no hay estados del optimizador ni gradientes, por lo que podemos restarlos. Y así terminamos con 6 bytes por parámetro del modelo para la inferencia en precisión mixta, más la memoria de activación. + +Veámoslo a detalle: + +**Pesos del Modelo:** + +- 4 bytes por número de parámetros para entrenamiento en fp32 +- 6 bytes por número de parámetros para entrenamiento en precisión mixta (mantiene un modelo en fp32 y uno en fp16 en memoria) + +**Estados del Optimizador:** + +- 8 bytes por número de parámetros para un AdamW normal (mantiene 2 estados) +- 2 bytes por número de parámetros para optimizadores de 8 bits como [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) +- 4 bytes por número de parámetros para optimizadores como SGD con momentum (mantiene solo 1 estado) + +**Gradientes** + +- 4 bytes por número de parámetros para entrenamiento en fp32 o precisión mixta (los gradientes siempre se mantienen en fp32) + +**Activaciones hacia Adelante** + +- El tamaño depende de muchos factores, los principales siendo la longitud de la secuencia, el tamaño oculto y el tamaño de lote. + +Hay entradas y salidas que se pasan y se devuelven por las funciones hacia adelante y hacia atrás, y las activaciones hacia adelante (*forward activations*) guardadas para el cálculo del gradiente. + +**Memoria Temporal** + +Además, hay todas clases de variables temporales que se liberan una vez que se completa el cálculo, pero en el momento podrían requerir memoria adicional y podrían provocar un error de memoria insuficiente. Por lo tanto, al codificar es crucial pensar estratégicamente sobre tales variables temporales y a veces liberarlas explícitamente tan pronto como ya no se necesitan. + +**Memoria Específica de Funcionalidad** + +Entonces, su software podría tener necesidades especiales de memoria. Por ejemplo, al generar texto mediante la búsqueda por haz, el software necesita mantener múltiples copias de las entradas y salidas. + +**Velocidad de Ejecución `forward` vs `backward`** + +Para convoluciones y capas lineales, hay 2x flops en la ejecución hacia atrás (`backward`) en comparación con la ejecución hacia adelante (`forward`), lo que generalmente se traduce en ~2x más lento (a veces más, porque los tamaños en la ejecución hacia atrás tienden a ser más complejos). Las activaciones suelen ser limitadas por ancho de banda, y es típico que una activación tenga que leer más datos en la ejecución hacia atrás que en la ejecución hacia adelante (por ejemplo, la activación hacia adelante lee una vez, escribe una vez, la activación hacia atrás lee dos veces, gradOutput y salida de la ejecución hacia adelante, y escribe una vez, gradInput). + +Como puedes ver, hay potencialmente unos pocos lugares donde podríamos ahorrar memoria de la GPU o acelerar operaciones. Ahora que entiendes qué afecta la utilización de la GPU y la velocidad de cálculo, consulta la página de documentación [Métodos y herramientas para entrenamiento eficiente en una sola GPU](https://huggingface.co/docs/transformers/perf_train_gpu_one) para aprender sobre técnicas de optimización del rendimiento. From c876d12127272ce2886ce51399928c3343d475b9 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Tue, 21 May 2024 09:14:14 +0200 Subject: [PATCH 16/80] FIX / TST: Fix expected results on Mistral slow test (A10) (#30909) Update test_modeling_mistral.py --- tests/models/mistral/test_modeling_mistral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 13bdf83c5b..dfa0646f8f 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -627,7 +627,7 @@ class MistralIntegrationTest(unittest.TestCase): def test_speculative_generation(self): EXPECTED_TEXT_COMPLETION = { 7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs", - 8: "My favourite condiment is 100% Sriracha. I love the heat, the sweetness, the tang", + 8: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", } prompt = "My favourite condiment is " tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False) From 8871b261500b3b81b08f58972edbfba8dd78e58c Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Tue, 21 May 2024 10:16:37 +0200 Subject: [PATCH 17/80] FEAT / Trainer: LOMO optimizer support (#30178) * add V1 - adalomo not working yet * add todo docs + refactor from comments * adjust LR * add docs * add more elaborated test * Apply suggestions from code review Co-authored-by: Zach Mueller * fix * push * add accelerate check * fix DDP case * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix * init kwargs * safely add attribute * revert to enum logic * Update src/transformers/trainer.py --------- Co-authored-by: Zach Mueller Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- docs/source/en/trainer.md | 50 ++++++++++++++++++++++++++ src/transformers/testing_utils.py | 9 +++++ src/transformers/trainer.py | 42 +++++++++++++++++++--- src/transformers/training_args.py | 2 ++ src/transformers/utils/__init__.py | 1 + src/transformers/utils/import_utils.py | 5 +++ tests/trainer/test_trainer.py | 44 +++++++++++++++++++++++ 7 files changed, 149 insertions(+), 4 deletions(-) diff --git a/docs/source/en/trainer.md b/docs/source/en/trainer.md index b69bebd6ea..b71f42aa14 100644 --- a/docs/source/en/trainer.md +++ b/docs/source/en/trainer.md @@ -382,6 +382,56 @@ trainer.train() Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see [this appropriate section](https://github.com/jiaweizzhao/GaLore?tab=readme-ov-file#train-7b-model-with-a-single-gpu-with-24gb-memory) for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please [raise an issue on GitHub](https://github.com/huggingface/transformers/issues) if you encounter such issue. +## LOMO optimizer + +The LOMO optimizers have been introduced in [Full Parameter Fine-Tuning for Large Language Models with Limited Resources](https://hf.co/papers/2306.09782) and [AdaLomo: Low-memory Optimization with Adaptive Learning Rate](https://hf.co/papers/2310.10195). +They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are `"lomo"` and `"adalomo"`. First either install LOMO from pypi `pip install lomo-optim` or install it from source with `pip install git+https://github.com/OpenLMLab/LOMO.git`. + + + +According to the authors, it is recommended to use `AdaLomo` without `grad_norm` to get better performance and higher throughput. + + + +Below is a simple script to demonstrate how to fine-tune [google/gemma-2b](https://huggingface.co/google/gemma-2b) on IMDB dataset in full precision: + +```python +import torch +import datasets +from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM +import trl + +train_dataset = datasets.load_dataset('imdb', split='train') + +args = TrainingArguments( + output_dir="./test-lomo", + max_steps=1000, + per_device_train_batch_size=4, + optim="adalomo", + gradient_checkpointing=True, + logging_strategy="steps", + logging_steps=1, + learning_rate=2e-6, + save_strategy="no", + run_name="lomo-imdb", +) + +model_id = "google/gemma-2b" + +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0) + +trainer = trl.SFTTrainer( + model=model, + args=args, + train_dataset=train_dataset, + dataset_text_field='text', + max_seq_length=1024, +) + +trainer.train() +``` + ## Accelerate and Trainer The [`Trainer`] class is powered by [Accelerate](https://hf.co/docs/accelerate), a library for easily training PyTorch models in distributed environments with support for integrations such as [FullyShardedDataParallel (FSDP)](https://pytorch.org/blog/introducing-pytorch-fully-sharded-data-parallel-api/) and [DeepSpeed](https://www.deepspeed.ai/). diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index e17c5d295f..0abd804a36 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -82,6 +82,7 @@ from .utils import ( is_keras_nlp_available, is_levenshtein_available, is_librosa_available, + is_lomo_available, is_natten_available, is_nltk_available, is_onnx_available, @@ -338,6 +339,14 @@ def require_galore_torch(test_case): return unittest.skipUnless(is_galore_torch_available(), "test requires GaLore")(test_case) +def require_lomo(test_case): + """ + Decorator marking a test that requires LOMO. These tests are skipped when LOMO-optim isn't installed. + https://github.com/OpenLMLab/LOMO + """ + return unittest.skipUnless(is_lomo_available(), "test requires LOMO")(test_case) + + def require_cv2(test_case): """ Decorator marking a test that requires OpenCV. diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index afe414fbc8..20c709b248 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -153,6 +153,7 @@ from .utils import ( is_galore_torch_available, is_in_notebook, is_ipex_available, + is_lomo_available, is_peft_available, is_safetensors_available, is_sagemaker_dp_enabled, @@ -1059,12 +1060,18 @@ class Trainer: if "params" in optimizer_kwargs: optimizer_grouped_parameters = optimizer_kwargs.pop("params") + # Overwrite `model` in case it's created by `get_optimizer_cls_and_kwargs` + # e.g. for LOMO optimizer. + if "model" in optimizer_kwargs: + optimizer_grouped_parameters = optimizer_kwargs.pop("model") + # For layer-wise dummy optimizers we overwrite optimizer_grouped_parameters with `optimizer_dict` # to avoid arguments conflicts. if "optimizer_dict" in optimizer_kwargs: optimizer_grouped_parameters = optimizer_kwargs.pop("optimizer_dict") self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) + if optimizer_cls.__name__ == "Adam8bit": import bitsandbytes @@ -1382,6 +1389,26 @@ class Trainer: if args.optim == OptimizerNames.GALORE_ADAFACTOR: optimizer_kwargs.update({"scale_parameter": False, "relative_step": False}) + elif args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: + if not is_lomo_available(): + raise ImportError( + "You need to install `lomo_optim` in order to use LOMO optimizers" + " install it with `pip install lomo-optim`" + ) + if not is_accelerate_available("0.30.0"): + raise ImportError("You need to have `accelerate>=0.30.0` to be able to use LOMO optimizers") + + if model is None: + raise ValueError("You need to pass a `model` in order to correctly initialize a LOMO optimizer.") + + from lomo_optim import AdaLomo, Lomo + + if "ada" in args.optim: + optimizer_cls = AdaLomo + else: + optimizer_cls = Lomo + + optimizer_kwargs.update({"model": model}) else: raise ValueError(f"Trainer cannot instantiate unsupported optimizer: {args.optim}") return optimizer_cls, optimizer_kwargs @@ -2045,6 +2072,9 @@ class Trainer: model, self.optimizer, self.lr_scheduler = self.accelerator.prepare( self.model, self.optimizer, self.lr_scheduler ) + elif self.args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: + # In this case we are in DDP + LOMO, which should be supported + self.optimizer = self.accelerator.prepare(self.optimizer) if self.is_fsdp_enabled: self.model = self.model_wrapped = model @@ -2143,7 +2173,6 @@ class Trainer: self._globalstep_last_logged = self.state.global_step model.zero_grad() grad_norm: Optional[float] = None - self.control = self.callback_handler.on_train_begin(args, self.state, self.control) total_batched_samples = 0 @@ -2275,8 +2304,8 @@ class Trainer: else: grad_norm = _grad_norm - # Optimizer step self.optimizer.step() + optimizer_was_run = not self.accelerator.optimizer_step_was_skipped if optimizer_was_run: # Delay optimizer scheduling until metrics are generated @@ -3229,7 +3258,6 @@ class Trainer: """ model.train() inputs = self._prepare_inputs(inputs) - if is_sagemaker_mp_enabled(): loss_mb = smp_forward_backward(model, inputs, self.args.gradient_accumulation_steps) return loss_mb.reduce_mean().detach().to(self.args.device) @@ -3240,6 +3268,12 @@ class Trainer: del inputs torch.cuda.empty_cache() + kwargs = {} + + # For LOMO optimizers you need to explicitly use the learnign rate + if self.args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: + kwargs["learning_rate"] = self._get_learning_rate() + if self.args.n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu parallel training @@ -3247,7 +3281,7 @@ class Trainer: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: - self.accelerator.backward(loss) + self.accelerator.backward(loss, **kwargs) return loss.detach() / self.args.gradient_accumulation_steps diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 08d9000cb6..24bb3c7d02 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -171,6 +171,8 @@ class OptimizerNames(ExplicitEnum): GALORE_ADAMW_LAYERWISE = "galore_adamw_layerwise" GALORE_ADAMW_8BIT_LAYERWISE = "galore_adamw_8bit_layerwise" GALORE_ADAFACTOR_LAYERWISE = "galore_adafactor_layerwise" + LOMO = "lomo" + ADALOMO = "adalomo" # Sometimes users will pass in a `str` repr of a dict in the CLI diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 82d0b5001c..9f9d5ceb8d 100755 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -141,6 +141,7 @@ from .import_utils import ( is_keras_nlp_available, is_levenshtein_available, is_librosa_available, + is_lomo_available, is_mlx_available, is_natten_available, is_ninja_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index b941080b97..e58754db0d 100755 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -99,6 +99,7 @@ _av_available = importlib.util.find_spec("av") is not None _bitsandbytes_available = _is_package_available("bitsandbytes") _eetq_available = _is_package_available("eetq") _galore_torch_available = _is_package_available("galore_torch") +_lomo_available = _is_package_available("lomo_optim") # `importlib.metadata.version` doesn't work with `bs4` but `beautifulsoup4`. For `importlib.util.find_spec`, reversed. _bs4_available = importlib.util.find_spec("bs4") is not None _coloredlogs_available = _is_package_available("coloredlogs") @@ -328,6 +329,10 @@ def is_galore_torch_available(): return _galore_torch_available +def is_lomo_available(): + return _lomo_available + + def is_pyctcdecode_available(): return _pyctcdecode_available diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 1711f600ce..2d76264341 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -63,6 +63,7 @@ from transformers.testing_utils import ( require_deepspeed, require_galore_torch, require_intel_extension_for_pytorch, + require_lomo, require_optuna, require_peft, require_ray, @@ -1229,6 +1230,49 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): trainer.train() trainer.evaluate() + @require_lomo + @require_torch_gpu + def test_lomo(self): + config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4) + tiny_llama = LlamaForCausalLM(config) + + previous_params = {n: p.clone() for n, p in tiny_llama.named_parameters()} + + x = torch.randint(0, 100, (128,)) + train_dataset = RepeatDataset(x) + + with tempfile.TemporaryDirectory() as tmpdir: + # Trainer without inf/nan filter + args = TrainingArguments(tmpdir, learning_rate=1e-2, logging_steps=5, optim="lomo", max_steps=20) + trainer = Trainer(tiny_llama, args, train_dataset=train_dataset) + + # Check this works + _ = trainer.train() + + for name, param in tiny_llama.named_parameters(): + self.assertFalse(torch.allclose(param, previous_params[name].to(param.device), rtol=1e-12, atol=1e-12)) + + @require_lomo + @require_torch_gpu + def test_adalomo(self): + config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4) + tiny_llama = LlamaForCausalLM(config) + x = torch.randint(0, 100, (128,)) + train_dataset = RepeatDataset(x) + + with tempfile.TemporaryDirectory() as tmpdir: + # Trainer without inf/nan filter + args = TrainingArguments( + tmpdir, + learning_rate=1e-9, + logging_steps=5, + optim="adalomo", + ) + trainer = Trainer(tiny_llama, args, train_dataset=train_dataset) + + # Check this works + _ = trainer.train() + def test_galore_matched_modules(self): regex_patterns = [r".*.attn.*", r".*.mlp.*"] From d502bd64756535ff6af43cbc5a15aa5da7f52483 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 09:41:29 +0100 Subject: [PATCH 18/80] Bump requests from 2.31.0 to 2.32.0 in /examples/research_projects/decision_transformer (#30925) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../research_projects/decision_transformer/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/research_projects/decision_transformer/requirements.txt b/examples/research_projects/decision_transformer/requirements.txt index fc23114851..0ae3469949 100644 --- a/examples/research_projects/decision_transformer/requirements.txt +++ b/examples/research_projects/decision_transformer/requirements.txt @@ -177,7 +177,7 @@ PyYAML==6.0 ray>2.6.3 redis==4.5.4 regex==2022.3.15 -requests==2.31.0 +requests==2.32.0 requests-oauthlib==1.3.1 resampy==0.2.2 responses==0.18.0 From a755745546779ae5c42510bc02a859bdac82b3b7 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 21 May 2024 17:43:22 +0800 Subject: [PATCH 19/80] PaliGemma - fix processor with no input text (#30916) Update processing_paligemma.py --- src/transformers/models/paligemma/processing_paligemma.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/paligemma/processing_paligemma.py b/src/transformers/models/paligemma/processing_paligemma.py index 76a9aaf705..258954d856 100644 --- a/src/transformers/models/paligemma/processing_paligemma.py +++ b/src/transformers/models/paligemma/processing_paligemma.py @@ -177,6 +177,7 @@ class PaliGemmaProcessor(ProcessorMixin): logger.warning_once( "You are using PaliGemma without a text prefix. It will perform as a picture-captioning model." ) + text = "" if isinstance(text, List) and isinstance(images, List): if len(images) < len(text): From 7a4792e6b3feba1cb379478449483754a70257ec Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 21 May 2024 17:16:07 +0530 Subject: [PATCH 20/80] CI: AMD MI300 tests fix (#30797) * add fix * update import * updated dicts and comments * remove prints * Update testing_utils.py --- src/transformers/testing_utils.py | 9 ++++ tests/models/bark/test_modeling_bark.py | 7 ++- tests/models/gemma/test_modeling_gemma.py | 45 +++++++++++++++++++ tests/models/kosmos2/test_modeling_kosmos2.py | 8 ++-- tests/models/llama/test_modeling_llama.py | 17 ++++++- tests/models/mistral/test_modeling_mistral.py | 11 ++++- tests/models/mixtral/test_modeling_mixtral.py | 21 +++++++++ .../perceiver/test_modeling_perceiver.py | 12 ++++- tests/trainer/test_trainer.py | 5 ++- tests/trainer/test_trainer_seq2seq.py | 1 - 10 files changed, 126 insertions(+), 10 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 0abd804a36..4c6940564e 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -166,6 +166,15 @@ ENDPOINT_STAGING = "https://hub-ci.huggingface.co" # Not critical, only usable on the sandboxed CI instance. TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL" +if is_torch_available(): + import torch + + IS_ROCM_SYSTEM = torch.version.hip is not None + IS_CUDA_SYSTEM = torch.version.cuda is not None +else: + IS_ROCM_SYSTEM = False + IS_CUDA_SYSTEM = False + def parse_flag_from_env(key, default=False): try: diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py index 476031068f..bfc6a4dadf 100644 --- a/tests/models/bark/test_modeling_bark.py +++ b/tests/models/bark/test_modeling_bark.py @@ -1327,4 +1327,9 @@ class BarkModelIntegrationTests(unittest.TestCase): output_with_offload = self.model.generate(**input_ids, do_sample=False, temperature=1.0) # checks if same output - self.assertListEqual(output_with_no_offload.tolist(), output_with_offload.tolist()) + self.assertListAlmostEqual(output_with_no_offload.squeeze().tolist(), output_with_offload.squeeze().tolist()) + + def assertListAlmostEqual(self, list1, list2, tol=1e-6): + self.assertEqual(len(list1), len(list2)) + for a, b in zip(list1, list2): + self.assertAlmostEqual(a, b, delta=tol) diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index 8281f59993..fa0db9ee78 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -601,6 +601,11 @@ class GemmaIntegrationTest(unittest.TestCase): @require_read_token def test_model_2b_bf16(self): model_id = "google/gemma-2b" + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXTS = { 7: [ "Hello I am doing a project on the 1990s and I need to know what the most popular music", @@ -610,6 +615,10 @@ class GemmaIntegrationTest(unittest.TestCase): "Hello I am doing a project on the 1990s and I need to know what the most popular music", "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], } model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( @@ -627,6 +636,11 @@ class GemmaIntegrationTest(unittest.TestCase): @require_read_token def test_model_2b_eager(self): model_id = "google/gemma-2b" + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXTS = { 7: [ "Hello I am doing a project on the 1990s and I am looking for some information on the ", @@ -636,6 +650,10 @@ class GemmaIntegrationTest(unittest.TestCase): "Hello I am doing a project on the 1990s and I need to know what the most popular music", "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], } model = AutoModelForCausalLM.from_pretrained( @@ -655,6 +673,11 @@ class GemmaIntegrationTest(unittest.TestCase): @require_read_token def test_model_2b_sdpa(self): model_id = "google/gemma-2b" + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXTS = { 7: [ "Hello I am doing a project on the 1990s and I need to know what the most popular music", @@ -664,6 +687,10 @@ class GemmaIntegrationTest(unittest.TestCase): "Hello I am doing a project on the 1990s and I need to know what the most popular music", "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], } model = AutoModelForCausalLM.from_pretrained( @@ -763,6 +790,11 @@ class GemmaIntegrationTest(unittest.TestCase): @require_read_token def test_model_7b_bf16(self): model_id = "google/gemma-7b" + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXTS = { 7: [ """Hello I am doing a project on a 1991 240sx and I am trying to find""", @@ -772,6 +804,10 @@ class GemmaIntegrationTest(unittest.TestCase): "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", "Hi today I am going to show you how to make a very simple and easy to make a very simple and", ], + 9: [ + "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", + "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign", + ], } model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( @@ -845,6 +881,11 @@ class GemmaIntegrationTest(unittest.TestCase): NUM_TOKENS_TO_GENERATE = 40 # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs. + # + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXT_COMPLETION = { 8: [ "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", @@ -854,6 +895,10 @@ class GemmaIntegrationTest(unittest.TestCase): "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", "Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the", ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", + "Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the", + ], } prompts = ["Hello I am doing", "Hi today"] diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py index 9bc95b8bd4..ca944d0df0 100644 --- a/tests/models/kosmos2/test_modeling_kosmos2.py +++ b/tests/models/kosmos2/test_modeling_kosmos2.py @@ -26,7 +26,7 @@ import requests from transformers import AutoModelForVision2Seq, AutoProcessor, Kosmos2Config from transformers.models.kosmos2.configuration_kosmos2 import Kosmos2TextConfig, Kosmos2VisionConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import IS_ROCM_SYSTEM, require_torch, require_vision, slow, torch_device from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -562,6 +562,8 @@ class Kosmos2ModelIntegrationTest(unittest.TestCase): processed_text = processed_text[0] final_text, entities = final_text_with_entities[0] + atol = 1e-4 if IS_ROCM_SYSTEM else 1e-5 + np.testing.assert_allclose( torch.concat(scores[1:4])[:3, :3].to("cpu").numpy(), np.array( @@ -571,7 +573,7 @@ class Kosmos2ModelIntegrationTest(unittest.TestCase): [-0.9352350831031799, -4.688288688659668, 6.240612983703613], ] ), - atol=1e-5, + atol=atol, ) np.testing.assert_allclose( torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(), @@ -629,7 +631,7 @@ class Kosmos2ModelIntegrationTest(unittest.TestCase): [-0.7624598741531372, -4.771658897399902, 6.576295852661133], ] ), - atol=1e-5, + atol=atol, ) np.testing.assert_allclose( torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(), diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 58269d62e0..61b33b3ec9 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -715,6 +715,11 @@ class LlamaIntegrationTest(unittest.TestCase): NUM_TOKENS_TO_GENERATE = 40 # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs. + # + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXT_COMPLETION = { 8: [ "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial " @@ -730,7 +735,15 @@ class LlamaIntegrationTest(unittest.TestCase): "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, " "and even on a good old fashioned cheeseburger. I love it on everything. I love it so", ], + 9: [ + "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial" + " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " + "theory of relativ", + "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs," + " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", + ], } + expected_text_completion_idx = 8 prompts = [ "Simply put, the theory of relativity states that ", @@ -745,7 +758,9 @@ class LlamaIntegrationTest(unittest.TestCase): # Dynamic Cache generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False) dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) - self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text) # Both GPU architectures have the same output + self.assertEqual( + EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text + ) # Both GPU architectures have the same output # Static Cache generated_ids = model.generate( diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index dfa0646f8f..cead890586 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -520,12 +520,16 @@ class MistralIntegrationTest(unittest.TestCase): EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in output. EXPECTED_SLICE = { 7: torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]), 8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]), + 9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]), } # fmt: skip - print(out[0, 0, :30]) torch.testing.assert_close( out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4 ) @@ -625,9 +629,14 @@ class MistralIntegrationTest(unittest.TestCase): @slow def test_speculative_generation(self): + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXT_COMPLETION = { 7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs", 8: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", + 9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", } prompt = "My favourite condiment is " tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False) diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 0972207fce..b60062004a 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -553,6 +553,10 @@ class MixtralIntegrationTest(unittest.TestCase): ) # TODO: might need to tweak it in case the logits do not match on our daily runners # these logits have been obtained with the original megablocks impelmentation. + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in output. EXPECTED_LOGITS = { 7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to( torch_device @@ -560,6 +564,9 @@ class MixtralIntegrationTest(unittest.TestCase): 8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to( torch_device ), + 9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8906, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to( + torch_device + ), } with torch.no_grad(): logits = model(dummy_input).logits @@ -583,6 +590,11 @@ class MixtralIntegrationTest(unittest.TestCase): ) # TODO: might need to tweak it in case the logits do not match on our daily runners + # + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_LOGITS_LEFT = { 7: torch.Tensor( [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]], @@ -590,6 +602,9 @@ class MixtralIntegrationTest(unittest.TestCase): 8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to( torch_device ), + 9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to( + torch_device + ), } EXPECTED_LOGITS_LEFT_UNPADDED = { @@ -599,6 +614,9 @@ class MixtralIntegrationTest(unittest.TestCase): 8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to( torch_device ), + 9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to( + torch_device + ), } EXPECTED_LOGITS_RIGHT_UNPADDED = { @@ -608,6 +626,9 @@ class MixtralIntegrationTest(unittest.TestCase): 8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to( torch_device ), + 9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to( + torch_device + ), } with torch.no_grad(): diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index fbd237bc10..86bbc7b49d 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -26,7 +26,14 @@ import numpy as np from datasets import load_dataset from transformers import PerceiverConfig -from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device +from transformers.testing_utils import ( + IS_ROCM_SYSTEM, + require_torch, + require_torch_multi_gpu, + require_vision, + slow, + torch_device, +) from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -930,7 +937,8 @@ class PerceiverModelIntegrationTest(unittest.TestCase): expected_slice = torch.tensor([-1.1652, -0.1992, -0.7520], device=torch_device) - self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=1e-4)) + atol = 1e-3 if IS_ROCM_SYSTEM else 1e-4 + self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=atol)) @slow def test_inference_image_classification_fourier(self): diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 2d76264341..fc7c3dc783 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2551,7 +2551,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ) eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") - training_args = TrainingArguments(output_dir="./examples", use_cpu=True) + training_args = TrainingArguments(output_dir="./examples", use_cpu=True, report_to="none") trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset) result = trainer.evaluate() self.assertLess(result["eval_loss"], 0.2) @@ -2572,6 +2572,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): output_dir="./examples", use_cpu=True, per_device_eval_batch_size=1, + report_to="none", ) trainer = Trainer( model=model, @@ -3107,6 +3108,8 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): "--predict_with_generate", "--ddp_timeout", "60", + "--report_to", + "none", ] execute_subprocess_async(command) # successful return here == success - any errors would have caused an error or a timeout in the sub-call diff --git a/tests/trainer/test_trainer_seq2seq.py b/tests/trainer/test_trainer_seq2seq.py index 61d2163b9e..17593e2e73 100644 --- a/tests/trainer/test_trainer_seq2seq.py +++ b/tests/trainer/test_trainer_seq2seq.py @@ -161,7 +161,6 @@ class Seq2seqTrainerTester(TestCasePlus): tokenizer=tokenizer, data_collator=data_collator, compute_metrics=lambda x: {"samples": x[0].shape[0]}, - report_to="none", ) def prepare_data(examples): From daf281f44f654abd5e7ed07e37985cee2250c3af Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 21 May 2024 07:50:11 -0400 Subject: [PATCH 21/80] Enforce saving at end of training if saving option chosen (#30160) * Enforce saving at end of training * Fix test * Rework test * Fixup tests' * Update comment based on sourab feedback * Clean --- src/transformers/trainer_callback.py | 3 ++ src/transformers/training_args.py | 3 ++ tests/trainer/test_trainer.py | 51 ++++++++++++++++++++++++++ tests/trainer/test_trainer_callback.py | 2 +- 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index a21c46fea9..d2570ed8ba 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -544,6 +544,9 @@ class DefaultFlowCallback(TrainerCallback): # End training if state.global_step >= state.max_steps: control.should_training_stop = True + # Save the model at the end if we have a save strategy + if args.save_strategy != IntervalStrategy.NO: + control.should_save = True return control diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 24bb3c7d02..60a81f7454 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -335,6 +335,9 @@ class TrainingArguments: - `"no"`: No save is done during training. - `"epoch"`: Save is done at the end of each epoch. - `"steps"`: Save is done every `save_steps`. + + If `"epoch"` or `"steps"` is chosen, saving will also be performed at the + very end of training, always. save_steps (`int` or `float`, *optional*, defaults to 500): Number of updates steps before two checkpoint saves if `save_strategy="steps"`. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps. diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index fc7c3dc783..4d3fc57340 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -129,6 +129,7 @@ if is_torch_available(): if is_safetensors_available(): import safetensors.torch + # for version specific tests in TrainerIntegrationTest require_accelerate_version_min_0_28 = partial(require_accelerate, min_version="0.28") GRAD_ACCUM_KWARGS_VERSION_AVAILABLE = is_accelerate_available("0.28") @@ -2016,6 +2017,56 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): tmpdir, 5, int(self.n_epochs * 64 / self.batch_size), False, safe_weights=save_safetensors ) + def test_load_best_model_with_save(self): + with tempfile.TemporaryDirectory() as tmpdir: + trainer = get_regression_trainer( + output_dir=tmpdir, + save_steps=5, + evaluation_strategy="steps", + eval_steps=5, + max_steps=9, + ) + trainer.train() + # Check that we have the last known step: + assert os.path.exists( + os.path.join(tmpdir, f"checkpoint-{trainer.state.max_steps}") + ), f"Could not find checkpoint-{trainer.state.max_steps}" + # And then check the last step + assert os.path.exists(os.path.join(tmpdir, "checkpoint-9")), "Could not find checkpoint-9" + + # Now test that using a limit works + # Should result in: + # - save at step 5 (but is deleted) + # - save at step 10 (loaded in at the end when `load_best_model=True`) + # - save at step 11 + with tempfile.TemporaryDirectory() as tmpdir: + trainer = get_regression_trainer( + output_dir=tmpdir, + save_steps=5, + evaluation_strategy="steps", + eval_steps=5, + load_best_model_at_end=True, + save_total_limit=2, + max_steps=11, + ) + trainer.train() + # Check that we have the last known step: + assert os.path.exists(os.path.join(tmpdir, "checkpoint-11")), "Could not find checkpoint-11" + # And then check the last multiple + assert os.path.exists(os.path.join(tmpdir, "checkpoint-10")), "Could not find checkpoint-10" + # Finally check that we don't have an old one + assert not os.path.exists(os.path.join(tmpdir, "checkpoint-5")), "Found checkpoint-5, limit not respected" + + # Finally check that the right model was loaded in, checkpoint-10 + # this goes by the last `eval` step check to do so, so it won't be + # the last model *saved* + model_state = trainer.model.state_dict() + final_model_weights = safetensors.torch.load_file( + os.path.join(tmpdir, "checkpoint-10", "model.safetensors") + ) + for k, v in model_state.items(): + assert torch.allclose(v, final_model_weights[k]), f"{k} is not the same" + @require_torch_multi_accelerator def test_run_seq2seq_double_train_wrap_once(self): # test that we don't wrap the model more than once diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index 8c0c9367d8..9eeb1d5e41 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -153,7 +153,7 @@ class TrainerCallbackTest(unittest.TestCase): expected_events.append("on_log") if trainer.args.eval_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0: expected_events += evaluation_events.copy() - if step % trainer.args.save_steps == 0: + if step % trainer.args.save_steps == 0 or step == trainer.state.max_steps: expected_events.append("on_save") expected_events.append("on_epoch_end") if trainer.args.eval_strategy == IntervalStrategy.EPOCH: From 3b09d3f05f95629736bf5337ab118fa3327cc8c7 Mon Sep 17 00:00:00 2001 From: Matthew Beckers <17108752+mattlbeck@users.noreply.github.com> Date: Tue, 21 May 2024 13:56:52 +0100 Subject: [PATCH 22/80] fix: center_crop occasionally outputs off-by-one dimension matrix (#30934) If required padding for a crop larger than input image is odd-numbered, the padding would be rounded down instead of rounded up, causing the output dimension to be one smaller than it should be. --- src/transformers/image_transforms.py | 5 +++-- tests/test_image_transforms.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/transformers/image_transforms.py b/src/transformers/image_transforms.py index 016fae4405..65d6413db7 100644 --- a/src/transformers/image_transforms.py +++ b/src/transformers/image_transforms.py @@ -14,6 +14,7 @@ # limitations under the License. import warnings +from math import ceil from typing import Iterable, List, Optional, Tuple, Union import numpy as np @@ -483,9 +484,9 @@ def center_crop( new_image = np.zeros_like(image, shape=new_shape) # If the image is too small, pad it with zeros - top_pad = (new_height - orig_height) // 2 + top_pad = ceil((new_height - orig_height) / 2) bottom_pad = top_pad + orig_height - left_pad = (new_width - orig_width) // 2 + left_pad = ceil((new_width - orig_width) / 2) right_pad = left_pad + orig_width new_image[..., top_pad:bottom_pad, left_pad:right_pad] = image diff --git a/tests/test_image_transforms.py b/tests/test_image_transforms.py index ae86f84def..25775d787e 100644 --- a/tests/test_image_transforms.py +++ b/tests/test_image_transforms.py @@ -369,6 +369,10 @@ class ImageTransformsTester(unittest.TestCase): self.assertEqual(cropped_image.shape, (300, 260, 3)) self.assertTrue(np.allclose(cropped_image, expected_image)) + # Test that odd numbered padding requirement still leads to correct output dimensions + cropped_image = center_crop(image, (300, 259), data_format="channels_last") + self.assertEqual(cropped_image.shape, (300, 259, 3)) + # Test image with 4 channels is cropped correctly image = np.random.randint(0, 256, (224, 224, 4)) expected_image = image[52:172, 82:142, :] From 64e0573a8133db833521db9f23d05e36fc06c5f3 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 21 May 2024 15:15:19 +0200 Subject: [PATCH 23/80] [Benchmark] Reuse `optimum-benchmark` (#30615) * benchmark * update --------- Co-authored-by: ydshieh --- Makefile | 7 +- benchmark/__init__.py | 0 benchmark/benchmark.py | 310 ++++++++++++++++++ benchmark/config/generation.yaml | 57 ++++ benchmark/optimum_benchmark_wrapper.py | 16 + setup.py | 3 + src/transformers/dependency_versions_table.py | 1 + 7 files changed, 393 insertions(+), 1 deletion(-) create mode 100644 benchmark/__init__.py create mode 100644 benchmark/benchmark.py create mode 100644 benchmark/config/generation.yaml create mode 100644 benchmark/optimum_benchmark_wrapper.py diff --git a/Makefile b/Makefile index ebc66d922c..6c1541a113 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples +.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples benchmark # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) export PYTHONPATH = src @@ -96,6 +96,11 @@ test: test-examples: python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/ +# Run benchmark + +benchmark: + python3 benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=diff backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun + # Run tests for SageMaker DLC release test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker] diff --git a/benchmark/__init__.py b/benchmark/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 0000000000..9e38c1f70a --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,310 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Run benchmark using the `optimum-benchmark` library with some customization in `transformers`. + +Assume we are under `transformers` root directory: (make sure the commits are valid commits) +```bash +python benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=9b9c7f03da625b13643e99205c691fe046461724 --metrics=decode.latency.mean,per_token.latency.mean,per_token.throughput.value backend.model=google/gemma-2b benchmark.input_shapes.sequence_length=5,7 benchmark.input_shapes.batch_size=1,2 --multirun +``` +""" + +import argparse +import glob +import json +import os.path +import re +import tempfile +from contextlib import contextmanager +from pathlib import Path + +from git import Repo + +from optimum_benchmark import Benchmark +from optimum_benchmark_wrapper import main + + +PATH_TO_REPO = Path(__file__).parent.parent.resolve() + + +@contextmanager +def checkout_commit(repo: Repo, commit_id: str): + """ + Context manager that checks out a given commit when entered, but gets back to the reference it was at on exit. + Args: + repo (`git.Repo`): A git repository (for instance the Transformers repo). + commit_id (`str`): The commit reference to checkout inside the context manager. + """ + current_head = repo.head.commit if repo.head.is_detached else repo.head.ref + + try: + repo.git.checkout(commit_id) + yield + + finally: + repo.git.checkout(current_head) + + +def summarize(run_dir, metrics, expand_metrics=False): + """Produce a summary for each optimum-benchmark launched job's output directory found in `run_dir`. + + Each summary's format is as follows (for `expand_metrics=False`): + ``` + { + "model": "google/gemma-2b", + "commit": "3cd6ed22e4d49219f300f5055e71e3929aba20d7", + "config": "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5", + "metrics": { + "decode.latency.mean": 1.624666809082031, + "per_token.latency.mean": 0.012843788806628804, + "per_token.throughput.value": 77.85864553330948 + } + } + ``` + """ + reports = glob.glob(os.path.join(run_dir, "**/benchmark_report.json"), recursive=True) + report_dirs = [str(Path(report).parent) for report in reports] + + summaries = [] + for report_dir in report_dirs: + commit = re.search(r"/commit=([^/]+)", report_dir).groups()[0] + + if not os.path.isfile(os.path.join(report_dir, "benchmark.json")): + continue + benchmark = Benchmark.from_json(os.path.join(report_dir, "benchmark.json")) + report = benchmark.report + + model = benchmark.config.backend["model"] + + # Ths looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`. + # (we rely on the usage of hydra's `${hydra.job.override_dirname}`.) + benchmark_name = re.sub(f"backend.model={model},*", "", report_dir) + benchmark_name = str(Path(benchmark_name).parts[-1]) + if benchmark_name.startswith("commit="): + benchmark_name = benchmark.config.name + + metrics_values = {} + # post-processing of report: show a few selected/important metric + for metric in metrics: + keys = metric.split(".") + value = report + current = metrics_values + for key in keys: + # Avoid KeyError when a user's specified metric has typo. + # TODO: Give warnings. + if key not in value: + continue + value = value[key] + + if expand_metrics: + if isinstance(value, dict): + if key not in current: + current[key] = {} + current = current[key] + else: + current[key] = value + + if not expand_metrics: + metrics_values[metric] = value + + # show some config information + print(f"model: {model}") + print(f"commit: {commit}") + print(f"config: {benchmark_name}") + if len(metrics_values) > 0: + print("metrics:") + if expand_metrics: + print(metrics_values) + else: + for metric, value in metrics_values.items(): + print(f" - {metric}: {value}") + print("-" * 80) + + summary = { + "model": model, + "commit": commit, + "config": benchmark_name, + "metrics": metrics_values, + } + summaries.append(summary) + + with open(os.path.join(report_dir, "summary.json"), "w") as fp: + json.dump(summary, fp, indent=4) + + # TODO: upload to Hub + return summaries + + +def combine_summaries(summaries): + """Combine a list of summary obtained from the function `summarize`. + + The combined summary's format is as follows: + ``` + "google/gemma-2b": { + "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5": { + "3cd6ed22e4d49219f300f5055e71e3929aba20d7": { + "metrics": {"decode.latency.mean": 1.624666809082031} + }, + "c97ee28b117c0abe8e08891f402065e4df6d72aa": { + "metrics": {"decode.latency.mean": 1.6278163452148438} + } + }, + "benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=5": { + "3cd6ed22e4d49219f300f5055e71e3929aba20d7": { + "metrics": {"decode.latency.mean": 1.6947791748046876} + }, + "c97ee28b117c0abe8e08891f402065e4df6d72aa": { + "metrics": { + "decode.latency.mean": 1.6980519409179688} + } + } + } + ``` + """ + combined = {} + for summary in summaries: + model = summary["model"] + config = summary["config"] + commit = summary["commit"] + + if model not in combined: + combined[model] = {} + + if config not in combined[model]: + combined[model][config] = {} + + if commit not in combined[model][config]: + combined[model][config][commit] = {"metrics": summary["metrics"]} + + with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp: + json.dump(combined, fp, indent=4) + + # TODO: upload to Hub + print(json.dumps(combined, indent=4)) + + return combined + + +if __name__ == "__main__": + + def list_str(values): + return values.split(",") + + parser = argparse.ArgumentParser() + + parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.") + parser.add_argument("--config-name", type=str, required=True, help="The config name.") + + # arguments specific to this wrapper for our own customization + parser.add_argument("--ensure_empty", type=bool, default=True, help="If to create a temporary directory.") + parser.add_argument( + "--commit", + type=list_str, + default="", + help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.", + ) + parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.") + args, optimum_benchmark_args = parser.parse_known_args() + + repo = Repo(PATH_TO_REPO) + + metrics = [ + "prefill.latency.mean", + "prefill.throughput.value", + "decode.latency.mean", + "decode.throughput.value", + "per_token.latency.mean", + "per_token.throughput.value", + ] + if args.metrics is not None: + metrics = args.metrics.split(",") + + # Get `backend.model` in a hacky way: We want to control the experiment flow manually. + models = [""] + for idx, arg in enumerate(optimum_benchmark_args): + if arg.startswith("backend.model="): + models = arg[len("backend.model=") :] + models = models.split(",") + break + optimum_benchmark_args = [arg for arg in optimum_benchmark_args if not arg.startswith("backend.model=")] + + # Get the commit(s) + current_head = str(repo.head.commit) if repo.head.is_detached else str(repo.head.ref) + commits = [x for x in args.commit if x != ""] + if len(commits) == 0: + commits = [current_head] + elif len(commits) == 1 and commits[0] == "diff": + # compare to `main` + commits = ["main", current_head] + + # Get the specified run directory + run_dir_arg_idx, run_dir = -1, None + sweep_dir_arg_idx, sweep_dir = -1, None + for idx, arg in enumerate(optimum_benchmark_args): + if arg.startswith("hydra.run.dir="): + run_dir = arg[len("hydra.run.dir=") :] + run_dir_arg_idx = idx + elif arg.startswith("hydra.sweep.dir="): + sweep_dir = arg[len("hydra.sweep.dir=") :] + sweep_dir_arg_idx = idx + exp_run_dir, arg_dix, arg_name = ( + (sweep_dir, sweep_dir_arg_idx, "hydra.sweep.dir") + if "--multirun" in optimum_benchmark_args + else (run_dir, run_dir_arg_idx, "hydra.run.dir") + ) + + # TODO: not hardcoded + if exp_run_dir is None and args.ensure_empty: + exp_run_dir = "_benchmark" + + if args.ensure_empty: + os.makedirs(exp_run_dir, exist_ok=True) + exp_run_dir = tempfile.mkdtemp(dir=exp_run_dir) + + run_summaries = [] + for commit in commits: + with checkout_commit(repo, commit): + commit = str(repo.head.commit) + + commit_run_dir = exp_run_dir + if exp_run_dir is not None: + commit_run_dir = os.path.join(exp_run_dir, rf"commit\={commit}") + + print(f"Run benchmark on commit: {commit}") + + for model in models: + model_arg = [f"backend.model={model}"] if model != "" else [] + dir_args = [] + if commit_run_dir is not None: + if arg_dix > -1: + optimum_benchmark_args[arg_dix] = f"{arg_name}={commit_run_dir}" + else: + dir_args = [ + f"hydra.sweep.dir={commit_run_dir}", + f"hydra.run.dir={commit_run_dir}/" + "${hydra.job.override_dirname}", + ] + main(args.config_dir, args.config_name, model_arg + dir_args + optimum_benchmark_args) + + if commit_run_dir is not None: + # Need to remove the `\` character + summaries = summarize(commit_run_dir.replace("\\", ""), metrics) + run_summaries.extend(summaries) + + # aggregate the information across the commits + if exp_run_dir is not None: + with open(os.path.join(exp_run_dir, "summaries.json"), "w") as fp: + json.dump(run_summaries, fp, indent=4) + + combined_summary = combine_summaries(run_summaries) diff --git a/benchmark/config/generation.yaml b/benchmark/config/generation.yaml new file mode 100644 index 0000000000..44a3f9ea49 --- /dev/null +++ b/benchmark/config/generation.yaml @@ -0,0 +1,57 @@ +defaults: + - benchmark # inheriting benchmark schema + - scenario: inference + - launcher: process + - backend: pytorch + - _self_ # for hydra 1.1 compatibility + +name: pytorch_generate + +launcher: + start_method: spawn + device_isolation: true + device_isolation_action: warn + +backend: + device: cuda + device_ids: 0 + no_weights: true + model: meta-llama/Llama-2-7b-hf + cache_implementation: static + torch_compile: true + torch_dtype: float16 + torch_compile_config: + backend: inductor + mode: reduce-overhead + fullgraph: true + +scenario: + input_shapes: + batch_size: 1 + sequence_length: 7 + generate_kwargs: + max_new_tokens: 128 + min_new_tokens: 128 + do_sample: false + memory: true + latency: true + iterations: 2 + duration: 0 + + +# hydra/cli specific settings +hydra: + run: + # where to store run results + dir: runs/${name} + job: + # change working directory to the run directory + chdir: true + env_set: + # set environment variable OVERRIDE_BENCHMARKS to 1 + # to not skip benchmarks that have been run before + OVERRIDE_BENCHMARKS: 1 + LOG_LEVEL: WARN + sweep: + dir: multirun + subdir: ${hydra.job.override_dirname} \ No newline at end of file diff --git a/benchmark/optimum_benchmark_wrapper.py b/benchmark/optimum_benchmark_wrapper.py new file mode 100644 index 0000000000..c43e9a73e3 --- /dev/null +++ b/benchmark/optimum_benchmark_wrapper.py @@ -0,0 +1,16 @@ +import argparse +import subprocess + + +def main(config_dir, config_name, args): + subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.") + parser.add_argument("--config-name", type=str, required=True, help="The config name.") + args, unknown = parser.parse_known_args() + + main(args.config_dir, args.config_name, unknown) diff --git a/setup.py b/setup.py index dfd0b61cbe..efdb5342d0 100644 --- a/setup.py +++ b/setup.py @@ -136,6 +136,7 @@ _deps = [ "onnxruntime-tools>=1.4.2", "onnxruntime>=1.4.0", "opencv-python", + "optimum-benchmark>=0.2.0", "optuna", "optax>=0.0.8,<=0.1.4", "packaging>=20.0", @@ -410,6 +411,8 @@ extras["agents"] = deps_list( "diffusers", "accelerate", "datasets", "torch", "sentencepiece", "opencv-python", "Pillow" ) +extras["benchmark"] = deps_list("optimum-benchmark") + # when modifying the following list, make sure to update src/transformers/dependency_versions_check.py install_requires = [ deps["filelock"], # filesystem locks, e.g., to prevent parallel downloads diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index e4c9fbf0d9..ceca99d450 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -42,6 +42,7 @@ deps = { "onnxruntime-tools": "onnxruntime-tools>=1.4.2", "onnxruntime": "onnxruntime>=1.4.0", "opencv-python": "opencv-python", + "optimum-benchmark": "optimum-benchmark>=0.2.0", "optuna": "optuna", "optax": "optax>=0.0.8,<=0.1.4", "packaging": "packaging>=20.0", From eae2b6b89e72dbee531e5b96acc94b98161aa8a4 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Tue, 21 May 2024 15:54:41 +0200 Subject: [PATCH 24/80] TST / Workflows: Get slack notifications for docker image build (#30891) * Get slack notifications for docker image build * Apply suggestions from code review * Apply suggestions from code review --- .github/workflows/build-docker-images.yml | 83 ++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 7c9e86d091..f113579691 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -57,6 +57,15 @@ jobs: push: true tags: huggingface/transformers-all-latest-gpu-push-ci + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + latest-torch-deepspeed-docker: name: "Latest PyTorch + DeepSpeed" runs-on: [intel-cpu, 8-cpu, ci] @@ -93,6 +102,15 @@ jobs: push: true tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }} + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}} + title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) latest-torch-deepspeed-docker-for-push-ci-daily-build: name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" @@ -134,6 +152,15 @@ jobs: push: true tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + doc-builder: name: "Doc builder" # Push CI doesn't need this image @@ -160,6 +187,15 @@ jobs: push: true tags: huggingface/transformers-doc-builder + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the huggingface/transformers-doc-builder docker build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + latest-pytorch: name: "Latest PyTorch [dev]" # Push CI doesn't need this image @@ -198,6 +234,15 @@ jobs: push: true tags: huggingface/transformers-pytorch-gpu + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + latest-pytorch-amd: name: "Latest PyTorch (AMD) [dev]" runs-on: [intel-cpu, 8-cpu, ci] @@ -237,6 +282,15 @@ jobs: push: true tags: huggingface/transformers-pytorch-amd-gpu-push-ci + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + latest-tensorflow: name: "Latest TensorFlow [dev]" # Push CI doesn't need this image @@ -265,6 +319,15 @@ jobs: push: true tags: huggingface/transformers-tensorflow-gpu + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + latest-pytorch-deepspeed-amd: name: "PyTorch + DeepSpeed (AMD) [dev]" runs-on: [intel-cpu, 8-cpu, ci] @@ -304,6 +367,15 @@ jobs: push: true tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + latest-quantization-torch-docker: name: "Latest Pytorch + Quantization [dev]" # Push CI doesn't need this image @@ -330,4 +402,13 @@ jobs: build-args: | REF=main push: true - tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }} \ No newline at end of file + tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }} + + - name: Post to Slack + if: always() + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} + title: 🤗 Results of the transformers-quantization-latest-gpu build + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} From d24097e0229485287ff4959258c552168bd898c6 Mon Sep 17 00:00:00 2001 From: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Date: Tue, 21 May 2024 15:40:19 +0100 Subject: [PATCH 25/80] Fix swin embeddings interpolation (#30936) --- .../models/donut/modeling_donut_swin.py | 19 +++---------------- .../maskformer/modeling_maskformer_swin.py | 19 +++---------------- src/transformers/models/swin/modeling_swin.py | 17 ++--------------- .../models/swinv2/modeling_swinv2.py | 17 ++--------------- 4 files changed, 10 insertions(+), 62 deletions(-) diff --git a/src/transformers/models/donut/modeling_donut_swin.py b/src/transformers/models/donut/modeling_donut_swin.py index c1b27cd180..5b8a0e27b0 100644 --- a/src/transformers/models/donut/modeling_donut_swin.py +++ b/src/transformers/models/donut/modeling_donut_swin.py @@ -205,9 +205,7 @@ class DonutSwinEmbeddings(nn.Module): interpolate_pos_encoding: bool = False, ) -> Tuple[torch.Tensor]: _, num_channels, height, width = pixel_values.shape - embeddings, output_dimensions = self.patch_embeddings( - pixel_values, interpolate_pos_encoding=interpolate_pos_encoding - ) + embeddings, output_dimensions = self.patch_embeddings(pixel_values) embeddings = self.norm(embeddings) batch_size, seq_len, _ = embeddings.size() @@ -228,7 +226,7 @@ class DonutSwinEmbeddings(nn.Module): return embeddings, output_dimensions -# Copied from transformers.models.swin.modeling_swin.SwinPatchEmbeddings +# Copied from transformers.models.swin.modeling_swin.SwinPatchEmbeddings with Swin->DonutSwin class DonutSwinPatchEmbeddings(nn.Module): """ This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial @@ -260,21 +258,10 @@ class DonutSwinPatchEmbeddings(nn.Module): pixel_values = nn.functional.pad(pixel_values, pad_values) return pixel_values - def forward( - self, pixel_values: Optional[torch.FloatTensor], interpolate_pos_encoding: bool = False - ) -> Tuple[torch.Tensor, Tuple[int]]: + def forward(self, pixel_values: Optional[torch.FloatTensor]) -> Tuple[torch.Tensor, Tuple[int]]: _, num_channels, height, width = pixel_values.shape - if num_channels != self.num_channels: - raise ValueError( - "Make sure that the channel dimension of the pixel values match with the one set in the configuration." - ) # pad the input to be divisible by self.patch_size, if needed pixel_values = self.maybe_pad(pixel_values, height, width) - if not interpolate_pos_encoding and (height != self.image_size[0] or width != self.image_size[1]): - raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" - f" ({self.image_size[0]}*{self.image_size[1]})." - ) embeddings = self.projection(pixel_values) _, _, height, width = embeddings.shape output_dimensions = (height, width) diff --git a/src/transformers/models/maskformer/modeling_maskformer_swin.py b/src/transformers/models/maskformer/modeling_maskformer_swin.py index fc9c642adc..ef607ec811 100644 --- a/src/transformers/models/maskformer/modeling_maskformer_swin.py +++ b/src/transformers/models/maskformer/modeling_maskformer_swin.py @@ -197,9 +197,7 @@ class MaskFormerSwinEmbeddings(nn.Module): def forward(self, pixel_values, interpolate_pos_encoding): _, num_channels, height, width = pixel_values.shape - embeddings, output_dimensions = self.patch_embeddings( - pixel_values, interpolate_pos_encoding=interpolate_pos_encoding - ) + embeddings, output_dimensions = self.patch_embeddings(pixel_values) embeddings = self.norm(embeddings) if self.position_embeddings is not None: @@ -213,7 +211,7 @@ class MaskFormerSwinEmbeddings(nn.Module): return embeddings, output_dimensions -# Copied from transformers.models.swin.modeling_swin.SwinPatchEmbeddings +# Copied from transformers.models.swin.modeling_swin.SwinPatchEmbeddings with Swin->MaskFormerSwin class MaskFormerSwinPatchEmbeddings(nn.Module): """ This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial @@ -245,21 +243,10 @@ class MaskFormerSwinPatchEmbeddings(nn.Module): pixel_values = nn.functional.pad(pixel_values, pad_values) return pixel_values - def forward( - self, pixel_values: Optional[torch.FloatTensor], interpolate_pos_encoding: bool = False - ) -> Tuple[torch.Tensor, Tuple[int]]: + def forward(self, pixel_values: Optional[torch.FloatTensor]) -> Tuple[torch.Tensor, Tuple[int]]: _, num_channels, height, width = pixel_values.shape - if num_channels != self.num_channels: - raise ValueError( - "Make sure that the channel dimension of the pixel values match with the one set in the configuration." - ) # pad the input to be divisible by self.patch_size, if needed pixel_values = self.maybe_pad(pixel_values, height, width) - if not interpolate_pos_encoding and (height != self.image_size[0] or width != self.image_size[1]): - raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" - f" ({self.image_size[0]}*{self.image_size[1]})." - ) embeddings = self.projection(pixel_values) _, _, height, width = embeddings.shape output_dimensions = (height, width) diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 2a6363c8e6..9f64f8009a 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -291,9 +291,7 @@ class SwinEmbeddings(nn.Module): interpolate_pos_encoding: bool = False, ) -> Tuple[torch.Tensor]: _, num_channels, height, width = pixel_values.shape - embeddings, output_dimensions = self.patch_embeddings( - pixel_values, interpolate_pos_encoding=interpolate_pos_encoding - ) + embeddings, output_dimensions = self.patch_embeddings(pixel_values) embeddings = self.norm(embeddings) batch_size, seq_len, _ = embeddings.size() @@ -345,21 +343,10 @@ class SwinPatchEmbeddings(nn.Module): pixel_values = nn.functional.pad(pixel_values, pad_values) return pixel_values - def forward( - self, pixel_values: Optional[torch.FloatTensor], interpolate_pos_encoding: bool = False - ) -> Tuple[torch.Tensor, Tuple[int]]: + def forward(self, pixel_values: Optional[torch.FloatTensor]) -> Tuple[torch.Tensor, Tuple[int]]: _, num_channels, height, width = pixel_values.shape - if num_channels != self.num_channels: - raise ValueError( - "Make sure that the channel dimension of the pixel values match with the one set in the configuration." - ) # pad the input to be divisible by self.patch_size, if needed pixel_values = self.maybe_pad(pixel_values, height, width) - if not interpolate_pos_encoding and (height != self.image_size[0] or width != self.image_size[1]): - raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" - f" ({self.image_size[0]}*{self.image_size[1]})." - ) embeddings = self.projection(pixel_values) _, _, height, width = embeddings.shape output_dimensions = (height, width) diff --git a/src/transformers/models/swinv2/modeling_swinv2.py b/src/transformers/models/swinv2/modeling_swinv2.py index ac8ec197e5..48a4c65722 100644 --- a/src/transformers/models/swinv2/modeling_swinv2.py +++ b/src/transformers/models/swinv2/modeling_swinv2.py @@ -334,9 +334,7 @@ class Swinv2Embeddings(nn.Module): interpolate_pos_encoding: bool = False, ) -> Tuple[torch.Tensor]: _, num_channels, height, width = pixel_values.shape - embeddings, output_dimensions = self.patch_embeddings( - pixel_values, interpolate_pos_encoding=interpolate_pos_encoding - ) + embeddings, output_dimensions = self.patch_embeddings(pixel_values) embeddings = self.norm(embeddings) batch_size, seq_len, _ = embeddings.size() @@ -389,21 +387,10 @@ class Swinv2PatchEmbeddings(nn.Module): pixel_values = nn.functional.pad(pixel_values, pad_values) return pixel_values - def forward( - self, pixel_values: Optional[torch.FloatTensor], interpolate_pos_encoding: bool = False - ) -> Tuple[torch.Tensor, Tuple[int]]: + def forward(self, pixel_values: Optional[torch.FloatTensor]) -> Tuple[torch.Tensor, Tuple[int]]: _, num_channels, height, width = pixel_values.shape - if num_channels != self.num_channels: - raise ValueError( - "Make sure that the channel dimension of the pixel values match with the one set in the configuration." - ) # pad the input to be divisible by self.patch_size, if needed pixel_values = self.maybe_pad(pixel_values, height, width) - if not interpolate_pos_encoding and (height != self.image_size[0] or width != self.image_size[1]): - raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" - f" ({self.image_size[0]}*{self.image_size[1]})." - ) embeddings = self.projection(pixel_values) _, _, height, width = embeddings.shape output_dimensions = (height, width) From 5bf9caa06df47acda751ff5bcef95b937b86c71f Mon Sep 17 00:00:00 2001 From: Lu Teng Date: Wed, 22 May 2024 01:14:11 +0800 Subject: [PATCH 26/80] Fix inhomogeneous shape error in example (#30434) Fix inhomogeneous shape error in example. --- examples/flax/question-answering/run_qa.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 7c705090f1..16a744ddc3 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -426,7 +426,8 @@ def eval_data_collator(dataset: Dataset, batch_size: int): for idx in batch_idx: batch = dataset[idx] - batch = {k: np.array(v) for k, v in batch.items()} + # Ignore `offset_mapping` to avoid numpy/JAX array conversion issue. + batch = {k: np.array(v) for k, v in batch.items() if k != "offset_mapping"} yield batch @@ -1000,7 +1001,6 @@ def main(): position=2, ): _ = batch.pop("example_id") - _ = batch.pop("offset_mapping") predictions = pad_shard_unpad(p_eval_step)( state, batch, min_device_batch=per_device_eval_batch_size ) @@ -1055,7 +1055,6 @@ def main(): eval_loader, total=math.ceil(len(eval_dataset) / eval_batch_size), desc="Evaluating ...", position=2 ): _ = batch.pop("example_id") - _ = batch.pop("offset_mapping") predictions = pad_shard_unpad(p_eval_step)(state, batch, min_device_batch=per_device_eval_batch_size) start_logits = np.array(predictions[0]) end_logits = np.array(predictions[1]) From 60bb571e993b7d73257fb64044726b569fef9403 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Tue, 21 May 2024 19:38:02 +0200 Subject: [PATCH 27/80] =?UTF-8?q?=F0=9F=9A=A8=20[Idefics2]=20Update=20igno?= =?UTF-8?q?re=20index=20(#30898)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update ignore index * Update docs * Update docs --- docs/source/en/model_doc/idefics2.md | 52 +++++++++++++++++++ .../models/idefics2/modeling_idefics2.py | 2 +- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/idefics2.md b/docs/source/en/model_doc/idefics2.md index 31a7a1cdeb..5ad56b7b5c 100644 --- a/docs/source/en/model_doc/idefics2.md +++ b/docs/source/en/model_doc/idefics2.md @@ -87,6 +87,58 @@ generated_text = processor.batch_decode(generated_text, skip_special_tokens=True print("Generated text:", generated_text) ``` +- During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows: + +```python +import requests +from PIL import Image +from transformers import Idefics2Processor, Idefics2ForConditionalGeneration +import torch + +url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg" +url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg" + +image_1 = Image.open(requests.get(url_1, stream=True).raw) +image_2 = Image.open(requests.get(url_2, stream=True).raw) +images = [image_1, image_2] + +messages = [{ + "role": "user", + "content": [ + {"type": "text", "text": "What’s the difference between these two images?"}, + {"type": "image"}, + {"type": "image"}, + ], +}, +{ + "role": "assistant", + "content": [ + {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."}, + ], +}] + +device = "cuda" if torch.cuda.is_available() else "cpu" + +processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b") +model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b") +model.to(device) + +text = processor.apply_chat_template(messages, add_generation_prompt=False) +inputs = processor(images=images, text=text, return_tensors="pt").to(device) + +labels = inputs.input_ids.clone() +labels[labels == processor.tokenizer.pad_token_id] = -100 +labels[labels == model.config.image_token_id] = -100 + +inputs["labels"] = labels + +outputs = model(**inputs) +loss = outputs.loss +loss.backward() +``` + +Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100. + ## Model optimizations: Flash Attention The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging [Flash Attention](../perf_train_gpu_one.md#flash-attention-2), which is a faster implementation of the attention mechanism used inside the model. diff --git a/src/transformers/models/idefics2/modeling_idefics2.py b/src/transformers/models/idefics2/modeling_idefics2.py index 17ed18f6b9..6acabad063 100644 --- a/src/transformers/models/idefics2/modeling_idefics2.py +++ b/src/transformers/models/idefics2/modeling_idefics2.py @@ -1857,7 +1857,7 @@ class Idefics2ForConditionalGeneration(Idefics2PreTrainedModel): shift_logits = logits[..., :-1, :].contiguous() shift_labels = labels[..., 1:].contiguous() # Flatten the tokens - loss_fct = CrossEntropyLoss(ignore_index=self.image_token_id) + loss_fct = CrossEntropyLoss() loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) if not return_dict: From 673440d073d5f534a6d6bedeeca94869afd8d0a7 Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Wed, 22 May 2024 06:40:15 +0200 Subject: [PATCH 28/80] update ruff version (#30932) * update ruff version * fix research projects * Empty * Fix errors --------- Co-authored-by: Lysandre --- Makefile | 2 +- .../flax/language-modeling/run_mlm_flax.py | 1 + .../flax/language-modeling/run_t5_mlm_flax.py | 1 + examples/flax/question-answering/utils_qa.py | 1 + .../flax/text-classification/run_flax_glue.py | 3 +- .../flax/token-classification/run_flax_ner.py | 3 +- examples/legacy/benchmarking/plot_csv_file.py | 12 +- examples/legacy/benchmarking/run_benchmark.py | 2 +- .../multiple_choice/run_multiple_choice.py | 3 +- .../multiple_choice/utils_multiple_choice.py | 3 +- .../legacy/question-answering/run_squad.py | 3 +- .../question-answering/run_squad_trainer.py | 3 +- examples/legacy/run_language_modeling.py | 1 - examples/legacy/run_openai_gpt.py | 1 + examples/legacy/run_swag.py | 3 +- examples/legacy/run_transfo_xl.py | 9 +- examples/legacy/seq2seq/xla_spawn.py | 1 - .../legacy/token-classification/run_ner.py | 3 +- .../legacy/token-classification/utils_ner.py | 3 +- .../run_image_classification_no_trainer.py | 3 +- .../run_object_detection_no_trainer.py | 2 +- .../pytorch/question-answering/trainer_qa.py | 1 + .../question-answering/trainer_seq2seq_qa.py | 1 + .../pytorch/question-answering/utils_qa.py | 1 + .../run_semantic_segmentation_no_trainer.py | 2 +- .../run_wav2vec2_pretraining_no_trainer.py | 2 +- .../run_speech_recognition_ctc.py | 2 +- .../run_speech_recognition_ctc_adapter.py | 2 +- .../text-classification/run_classification.py | 2 +- .../pytorch/text-classification/run_glue.py | 2 +- .../run_glue_no_trainer.py | 3 +- .../pytorch/text-classification/run_xnli.py | 4 +- .../pytorch/text-generation/run_generation.py | 4 +- .../run_generation_contrastive_search.py | 3 +- examples/pytorch/xla_spawn.py | 1 - .../research_projects/adversarial/run_hans.py | 2 +- .../pabee/modeling_pabee_albert.py | 2 +- .../pabee/modeling_pabee_bert.py | 3 +- .../run_glue_with_pabee.py | 3 +- .../bertabs/configuration_bertabs.py | 3 +- ...ert_bertabs_original_pytorch_checkpoint.py | 2 +- .../bertology/run_bertology.py | 13 +- .../bertology/run_prune_gpt.py | 2 +- .../distillation/distiller.py | 5 +- .../distillation/grouped_batch_sampler.py | 4 +- .../distillation/lm_seqs_dataset.py | 5 +- .../distillation/run_squad_w_distillation.py | 2 +- .../distillation/scripts/binarized_data.py | 1 + .../distillation/scripts/extract.py | 1 + .../scripts/extract_distilbert.py | 1 + .../distillation/scripts/token_counts.py | 1 + .../research_projects/distillation/train.py | 1 + .../research_projects/distillation/utils.py | 5 +- .../dataset-streaming/run_mlm_flax_stream.py | 1 + .../lxmert/modeling_frcnn.py | 27 ++-- .../lxmert/processing_image.py | 27 ++-- examples/research_projects/lxmert/utils.py | 26 ++-- .../lxmert/visualizing_image.py | 27 ++-- .../research_projects/mm-imdb/run_mmimdb.py | 3 +- .../movement-pruning/counts_parameters.py | 1 + .../emmental/configuration_bert_masked.py | 3 +- .../emmental/modeling_bert_masked.py | 1 - .../movement-pruning/masked_run_glue.py | 2 +- .../movement-pruning/masked_run_squad.py | 3 +- .../onnx/summarization/run_onnx_exporter.py | 3 +- .../performer/run_mlm_performer.py | 1 + .../evaluate-hf-trt-qa.py | 3 +- .../quantization-qdqbert/quant_trainer.py | 1 + .../quantization-qdqbert/utils_qa.py | 1 + .../rag-end2end-retriever/eval_rag.py | 2 +- examples/research_projects/rag/eval_rag.py | 2 +- .../run_speech_recognition_ctc_bnb.py | 2 +- .../run_speech_recognition_ctc_streaming.py | 2 +- .../visual_bert/modeling_frcnn.py | 27 ++-- .../visual_bert/processing_image.py | 27 ++-- .../research_projects/visual_bert/utils.py | 26 ++-- .../visual_bert/visualizing_image.py | 27 ++-- .../xtreme-s/run_xtreme_s.py | 2 +- .../tensorflow/benchmarking/plot_csv_file.py | 12 +- .../benchmarking/run_benchmark_tf.py | 2 +- .../tensorflow/question-answering/utils_qa.py | 1 + .../text-classification/run_glue.py | 2 +- .../run_text_classification.py | 2 +- setup.py | 2 +- src/transformers/audio_utils.py | 1 + src/transformers/benchmark/benchmark.py | 3 +- src/transformers/benchmark/benchmark_tf.py | 3 +- src/transformers/benchmark/benchmark_utils.py | 1 - src/transformers/configuration_utils.py | 3 +- src/transformers/convert_graph_to_onnx.py | 8 +- .../convert_pytorch_checkpoint_to_tf2.py | 3 +- ...ert_slow_tokenizers_checkpoints_to_fast.py | 2 +- ...nvert_tf_hub_seq_to_seq_bert_to_pytorch.py | 1 - .../data/metrics/squad_metrics.py | 1 - src/transformers/data/processors/glue.py | 2 +- src/transformers/data/processors/xnli.py | 3 +- src/transformers/deepspeed.py | 1 + src/transformers/dependency_versions_table.py | 2 +- src/transformers/dynamic_module_utils.py | 1 + .../feature_extraction_sequence_utils.py | 3 +- src/transformers/feature_extraction_utils.py | 2 +- .../generation/configuration_utils.py | 2 +- .../generation/stopping_criteria.py | 6 +- src/transformers/generation/watermarking.py | 1 - src/transformers/integrations/aqlm.py | 1 - src/transformers/integrations/awq.py | 1 + src/transformers/integrations/deepspeed.py | 1 + src/transformers/integrations/ggml.py | 1 + .../integrations/integration_utils.py | 1 + src/transformers/modelcard.py | 3 +- .../modeling_flax_pytorch_utils.py | 3 +- src/transformers/modeling_tf_pytorch_utils.py | 3 +- .../models/albert/configuration_albert.py | 3 +- ...lbert_original_tf_checkpoint_to_pytorch.py | 1 - .../models/albert/modeling_tf_albert.py | 3 +- .../models/albert/tokenization_albert.py | 3 +- .../models/albert/tokenization_albert_fast.py | 3 +- .../models/align/configuration_align.py | 2 +- .../models/align/modeling_align.py | 2 +- .../models/align/processing_align.py | 1 - .../models/altclip/configuration_altclip.py | 3 +- .../models/altclip/modeling_altclip.py | 3 +- .../models/altclip/processing_altclip.py | 1 + ...iguration_audio_spectrogram_transformer.py | 3 +- ...trogram_transformer_original_to_pytorch.py | 1 - .../modeling_audio_spectrogram_transformer.py | 2 +- src/transformers/models/auto/auto_factory.py | 1 + .../models/auto/configuration_auto.py | 3 +- .../models/auto/feature_extraction_auto.py | 3 +- .../models/auto/image_processing_auto.py | 3 +- src/transformers/models/auto/modeling_auto.py | 2 +- .../models/auto/modeling_flax_auto.py | 3 +- .../models/auto/modeling_tf_auto.py | 3 +- .../models/auto/processing_auto.py | 3 +- .../models/auto/tokenization_auto.py | 2 +- .../autoformer/configuration_autoformer.py | 2 +- .../models/autoformer/modeling_autoformer.py | 2 +- .../models/bark/configuration_bark.py | 2 +- .../models/bark/convert_suno_to_hf.py | 1 + .../bark/generation_configuration_bark.py | 2 +- src/transformers/models/bark/modeling_bark.py | 3 +- .../models/bark/processing_bark.py | 1 + .../models/bart/configuration_bart.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - src/transformers/models/bart/modeling_bart.py | 3 +- .../models/bart/modeling_flax_bart.py | 2 +- .../models/bart/modeling_tf_bart.py | 3 +- .../models/barthez/tokenization_barthez.py | 3 +- .../barthez/tokenization_barthez_fast.py | 3 +- .../models/bartpho/tokenization_bartpho.py | 3 +- .../models/beit/configuration_beit.py | 3 +- .../beit/convert_beit_unilm_to_pytorch.py | 1 - src/transformers/models/beit/modeling_beit.py | 3 +- .../models/bert/configuration_bert.py | 3 +- ...bert_original_tf2_checkpoint_to_pytorch.py | 1 + ..._bert_original_tf_checkpoint_to_pytorch.py | 1 - ...ping_original_tf2_checkpoint_to_pytorch.py | 1 + .../models/bert/modeling_tf_bert.py | 3 +- .../models/bert/tokenization_bert.py | 1 - .../configuration_bert_generation.py | 2 +- .../tokenization_bert_generation.py | 3 +- .../tokenization_bert_japanese.py | 1 - .../models/bertweet/tokenization_bertweet.py | 3 +- .../models/big_bird/configuration_big_bird.py | 3 +- ...gbird_original_tf_checkpoint_to_pytorch.py | 1 - .../models/big_bird/modeling_big_bird.py | 11 +- .../models/big_bird/tokenization_big_bird.py | 1 - .../big_bird/tokenization_big_bird_fast.py | 3 +- .../configuration_bigbird_pegasus.py | 2 +- .../modeling_bigbird_pegasus.py | 10 +- .../models/biogpt/configuration_biogpt.py | 2 +- .../models/biogpt/modeling_biogpt.py | 3 +- .../models/biogpt/tokenization_biogpt.py | 1 + .../models/bit/configuration_bit.py | 2 +- .../models/bit/convert_bit_to_pytorch.py | 1 - src/transformers/models/bit/modeling_bit.py | 2 +- .../blenderbot/configuration_blenderbot.py | 2 +- .../models/blenderbot/modeling_blenderbot.py | 3 +- .../blenderbot/modeling_flax_blenderbot.py | 2 +- .../blenderbot/modeling_tf_blenderbot.py | 3 +- .../tokenization_blenderbot_fast.py | 1 + .../configuration_blenderbot_small.py | 2 +- .../modeling_blenderbot_small.py | 3 +- .../modeling_flax_blenderbot_small.py | 3 +- .../modeling_tf_blenderbot_small.py | 3 +- .../tokenization_blenderbot_small_fast.py | 1 + .../models/blip/configuration_blip.py | 2 +- src/transformers/models/blip/modeling_blip.py | 2 +- .../models/blip/modeling_tf_blip.py | 2 +- .../models/blip_2/configuration_blip_2.py | 2 +- .../models/blip_2/modeling_blip_2.py | 2 +- .../models/bloom/configuration_bloom.py | 3 +- ...rt_bloom_original_checkpoint_to_pytorch.py | 1 - .../models/bloom/tokenization_bloom_fast.py | 1 - .../bridgetower/configuration_bridgetower.py | 2 +- .../models/bros/configuration_bros.py | 2 +- src/transformers/models/bros/modeling_bros.py | 3 +- ..._byt5_original_tf_checkpoint_to_pytorch.py | 1 - .../models/byt5/tokenization_byt5.py | 3 +- .../camembert/configuration_camembert.py | 2 +- .../models/camembert/modeling_tf_camembert.py | 3 +- .../camembert/tokenization_camembert.py | 3 +- .../camembert/tokenization_camembert_fast.py | 3 +- .../models/canine/configuration_canine.py | 2 +- ...anine_original_tf_checkpoint_to_pytorch.py | 1 - .../models/canine/modeling_canine.py | 3 +- .../configuration_chinese_clip.py | 2 +- .../chinese_clip/modeling_chinese_clip.py | 3 +- .../models/clap/configuration_clap.py | 2 +- .../models/clap/feature_extraction_clap.py | 1 - src/transformers/models/clap/modeling_clap.py | 3 +- .../models/clip/configuration_clip.py | 2 +- src/transformers/models/clip/modeling_clip.py | 3 +- .../models/clip/modeling_tf_clip.py | 3 +- .../models/clip/tokenization_clip_fast.py | 1 - .../models/clipseg/configuration_clipseg.py | 2 +- .../models/clipseg/modeling_clipseg.py | 2 +- .../models/clvp/configuration_clvp.py | 3 +- src/transformers/models/clvp/modeling_clvp.py | 3 +- .../models/clvp/number_normalizer.py | 1 - .../models/clvp/processing_clvp.py | 1 - .../code_llama/tokenization_code_llama.py | 1 + .../models/codegen/configuration_codegen.py | 3 +- .../models/codegen/modeling_codegen.py | 2 +- .../models/codegen/tokenization_codegen.py | 1 - .../codegen/tokenization_codegen_fast.py | 1 - .../models/cohere/configuration_cohere.py | 2 +- .../configuration_conditional_detr.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../modeling_conditional_detr.py | 3 +- .../models/convbert/configuration_convbert.py | 2 +- .../models/convbert/modeling_convbert.py | 135 +++++++++--------- .../models/convbert/modeling_tf_convbert.py | 3 +- .../models/convbert/tokenization_convbert.py | 1 + .../convbert/tokenization_convbert_fast.py | 1 + .../models/convnext/configuration_convnext.py | 2 +- .../convnext/convert_convnext_to_pytorch.py | 1 - .../models/convnext/modeling_convnext.py | 3 +- .../models/convnext/modeling_tf_convnext.py | 3 +- .../convnextv2/configuration_convnextv2.py | 3 +- .../models/convnextv2/modeling_convnextv2.py | 3 +- .../convnextv2/modeling_tf_convnextv2.py | 3 +- .../models/cpm/tokenization_cpm.py | 1 + .../models/cpm/tokenization_cpm_fast.py | 1 + .../models/cpmant/configuration_cpmant.py | 2 +- .../models/cpmant/modeling_cpmant.py | 3 +- .../models/cpmant/tokenization_cpmant.py | 1 + .../models/ctrl/configuration_ctrl.py | 2 +- src/transformers/models/ctrl/modeling_ctrl.py | 2 +- .../models/ctrl/modeling_tf_ctrl.py | 2 +- .../models/ctrl/tokenization_ctrl.py | 1 - .../models/cvt/configuration_cvt.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - src/transformers/models/cvt/modeling_cvt.py | 3 +- .../models/cvt/modeling_tf_cvt.py | 3 +- .../data2vec/configuration_data2vec_audio.py | 2 +- .../data2vec/configuration_data2vec_text.py | 3 +- .../data2vec/configuration_data2vec_vision.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../data2vec/modeling_data2vec_audio.py | 2 +- .../data2vec/modeling_data2vec_vision.py | 3 +- .../data2vec/modeling_tf_data2vec_vision.py | 3 +- .../models/dbrx/configuration_dbrx.py | 2 +- src/transformers/models/dbrx/modeling_dbrx.py | 2 +- .../models/deberta/configuration_deberta.py | 3 +- .../models/deberta/modeling_deberta.py | 2 +- .../models/deberta/modeling_tf_deberta.py | 3 +- .../models/deberta/tokenization_deberta.py | 2 +- .../deberta/tokenization_deberta_fast.py | 2 +- .../deberta_v2/configuration_deberta_v2.py | 3 +- .../models/deberta_v2/modeling_deberta_v2.py | 2 +- .../deberta_v2/modeling_tf_deberta_v2.py | 2 +- .../deberta_v2/tokenization_deberta_v2.py | 2 +- .../configuration_decision_transformer.py | 2 +- .../modeling_decision_transformer.py | 2 +- .../configuration_deformable_detr.py | 2 +- .../convert_deformable_detr_to_pytorch.py | 1 - .../models/deformable_detr/load_custom.py | 3 +- .../modeling_deformable_detr.py | 3 +- .../models/deit/configuration_deit.py | 2 +- .../deit/convert_deit_timm_to_pytorch.py | 1 - src/transformers/models/deit/modeling_deit.py | 3 +- .../models/deit/modeling_tf_deit.py | 3 +- ...original_gluonnlp_checkpoint_to_pytorch.py | 1 - .../models/deprecated/mctct/modeling_mctct.py | 3 +- .../deprecated/mctct/processing_mctct.py | 1 + .../deprecated/mmbt/configuration_mmbt.py | 2 +- .../models/deprecated/mmbt/modeling_mmbt.py | 1 - .../open_llama/configuration_open_llama.py | 2 +- .../open_llama/modeling_open_llama.py | 3 +- .../retribert/configuration_retribert.py | 2 +- .../retribert/modeling_retribert.py | 1 - .../configuration_trajectory_transformer.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 2 +- .../modeling_trajectory_transformer.py | 2 +- .../transfo_xl/configuration_transfo_xl.py | 2 +- ...fo_xl_original_tf_checkpoint_to_pytorch.py | 1 - .../transfo_xl/modeling_tf_transfo_xl.py | 2 +- .../modeling_tf_transfo_xl_utilities.py | 3 +- .../transfo_xl/modeling_transfo_xl.py | 5 +- .../modeling_transfo_xl_utilities.py | 3 +- .../transfo_xl/tokenization_transfo_xl.py | 3 +- .../deprecated/van/configuration_van.py | 2 +- .../deprecated/van/convert_van_to_pytorch.py | 1 - .../models/deprecated/van/modeling_van.py | 2 +- .../configuration_depth_anything.py | 2 +- .../convert_depth_anything_to_hf.py | 1 - .../depth_anything/modeling_depth_anything.py | 3 +- .../models/deta/configuration_deta.py | 3 +- .../deta/convert_deta_resnet_to_pytorch.py | 1 - .../deta/convert_deta_swin_to_pytorch.py | 1 - src/transformers/models/deta/modeling_deta.py | 3 +- .../models/detr/configuration_detr.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/detr/convert_detr_to_pytorch.py | 1 - src/transformers/models/detr/modeling_detr.py | 3 +- .../models/dinat/configuration_dinat.py | 2 +- .../models/dinat/modeling_dinat.py | 3 +- .../models/dinov2/configuration_dinov2.py | 2 +- .../models/dinov2/convert_dinov2_to_hf.py | 1 - .../models/dinov2/modeling_dinov2.py | 3 +- .../distilbert/configuration_distilbert.py | 3 +- .../models/distilbert/modeling_distilbert.py | 5 +- .../distilbert/modeling_tf_distilbert.py | 3 +- .../dit/convert_dit_unilm_to_pytorch.py | 1 - .../models/donut/configuration_donut_swin.py | 2 +- .../models/donut/convert_donut_to_pytorch.py | 24 ++-- .../models/donut/modeling_donut_swin.py | 2 +- .../models/donut/processing_donut.py | 1 + .../models/dpr/configuration_dpr.py | 2 +- src/transformers/models/dpr/modeling_dpr.py | 3 +- .../models/dpr/modeling_tf_dpr.py | 2 +- .../models/dpr/tokenization_dpr.py | 1 - .../models/dpr/tokenization_dpr_fast.py | 1 - .../models/dpt/configuration_dpt.py | 2 +- .../models/dpt/convert_dinov2_depth_to_hf.py | 1 - .../models/dpt/convert_dpt_beit_to_hf.py | 1 - .../dpt/convert_dpt_hybrid_to_pytorch.py | 1 - .../models/dpt/convert_dpt_swinv2_to_hf.py | 1 - .../models/dpt/convert_dpt_to_pytorch.py | 1 - src/transformers/models/dpt/modeling_dpt.py | 3 +- .../configuration_efficientformer.py | 2 +- .../modeling_efficientformer.py | 2 +- .../modeling_tf_efficientformer.py | 2 +- .../configuration_efficientnet.py | 2 +- .../efficientnet/modeling_efficientnet.py | 3 +- .../models/electra/configuration_electra.py | 2 +- ...ectra_original_tf_checkpoint_to_pytorch.py | 1 - .../models/electra/modeling_tf_electra.py | 3 +- .../models/encodec/configuration_encodec.py | 3 +- .../models/encodec/modeling_encodec.py | 2 +- .../modeling_encoder_decoder.py | 3 +- .../modeling_flax_encoder_decoder.py | 3 +- .../modeling_tf_encoder_decoder.py | 3 +- .../models/ernie/configuration_ernie.py | 3 +- .../models/ernie/modeling_ernie.py | 1 - .../models/ernie_m/configuration_ernie_m.py | 2 +- .../models/ernie_m/modeling_ernie_m.py | 3 +- .../models/esm/configuration_esm.py | 2 +- src/transformers/models/esm/convert_esm.py | 1 - src/transformers/models/esm/modeling_esm.py | 2 +- .../models/esm/modeling_tf_esm.py | 3 +- .../models/esm/openfold_utils/feats.py | 6 +- .../models/esm/openfold_utils/protein.py | 1 + .../models/esm/openfold_utils/tensor_utils.py | 12 +- .../models/esm/tokenization_esm.py | 1 + .../configuration_fastspeech2_conformer.py | 2 +- .../modeling_fastspeech2_conformer.py | 2 +- .../tokenization_fastspeech2_conformer.py | 1 + .../models/flaubert/configuration_flaubert.py | 3 +- .../models/flaubert/modeling_flaubert.py | 2 +- .../models/flaubert/modeling_tf_flaubert.py | 3 +- .../models/flaubert/tokenization_flaubert.py | 1 - .../models/flava/configuration_flava.py | 2 +- .../models/flava/modeling_flava.py | 2 +- .../models/fnet/configuration_fnet.py | 2 +- ...net_original_flax_checkpoint_to_pytorch.py | 1 - src/transformers/models/fnet/modeling_fnet.py | 2 +- .../models/fnet/tokenization_fnet.py | 2 +- .../models/fnet/tokenization_fnet_fast.py | 3 +- .../models/focalnet/configuration_focalnet.py | 2 +- .../models/focalnet/modeling_focalnet.py | 3 +- .../models/fsmt/configuration_fsmt.py | 3 +- .../models/fsmt/tokenization_fsmt.py | 1 - .../models/funnel/configuration_funnel.py | 2 +- ...unnel_original_tf_checkpoint_to_pytorch.py | 1 - .../models/funnel/modeling_funnel.py | 2 +- .../models/funnel/modeling_tf_funnel.py | 3 +- .../models/funnel/tokenization_funnel.py | 2 +- .../models/funnel/tokenization_funnel_fast.py | 2 +- .../models/fuyu/configuration_fuyu.py | 2 +- src/transformers/models/fuyu/modeling_fuyu.py | 3 +- .../models/fuyu/processing_fuyu.py | 1 + .../models/gemma/configuration_gemma.py | 2 +- .../models/gemma/modeling_flax_gemma.py | 1 + .../models/gemma/modeling_gemma.py | 2 +- .../models/gemma/tokenization_gemma.py | 1 + .../models/git/convert_git_to_pytorch.py | 1 - src/transformers/models/git/modeling_git.py | 1 - .../models/glpn/configuration_glpn.py | 2 +- .../models/glpn/convert_glpn_to_pytorch.py | 1 - src/transformers/models/glpn/modeling_glpn.py | 3 +- .../models/gpt2/configuration_gpt2.py | 3 +- ..._gpt2_original_tf_checkpoint_to_pytorch.py | 1 - .../models/gpt2/modeling_tf_gpt2.py | 2 +- .../models/gpt2/tokenization_gpt2.py | 1 - .../models/gpt2/tokenization_gpt2_fast.py | 1 - .../gpt_bigcode/configuration_gpt_bigcode.py | 2 +- .../gpt_bigcode/modeling_gpt_bigcode.py | 1 + .../models/gpt_neo/configuration_gpt_neo.py | 2 +- .../convert_gpt_neo_mesh_tf_to_pytorch.py | 1 - .../models/gpt_neo/modeling_gpt_neo.py | 3 +- .../models/gpt_neox/configuration_gpt_neox.py | 2 +- .../models/gpt_neox/modeling_gpt_neox.py | 2 +- .../gpt_neox/tokenization_gpt_neox_fast.py | 1 + .../configuration_gpt_neox_japanese.py | 2 +- .../modeling_gpt_neox_japanese.py | 2 +- .../tokenization_gpt_neox_japanese.py | 1 + .../gpt_sw3/convert_megatron_to_pytorch.py | 2 +- .../models/gptj/configuration_gptj.py | 3 +- src/transformers/models/gptj/modeling_gptj.py | 2 +- .../models/gptj/modeling_tf_gptj.py | 2 +- .../configuration_gptsan_japanese.py | 3 +- .../modeling_gptsan_japanese.py | 3 +- .../tokenization_gptsan_japanese.py | 1 + .../graphormer/configuration_graphormer.py | 2 +- .../models/graphormer/modeling_graphormer.py | 2 +- .../configuration_grounding_dino.py | 2 +- .../grounding_dino/modeling_grounding_dino.py | 2 +- .../models/groupvit/configuration_groupvit.py | 2 +- .../models/groupvit/modeling_groupvit.py | 3 +- .../models/groupvit/modeling_tf_groupvit.py | 3 +- .../models/hubert/configuration_hubert.py | 2 +- ...rt_original_s3prl_checkpoint_to_pytorch.py | 1 - ..._original_pytorch_checkpoint_to_pytorch.py | 1 - ...rt_original_s3prl_checkpoint_to_pytorch.py | 1 - .../models/hubert/modeling_hubert.py | 2 +- .../models/hubert/modeling_tf_hubert.py | 2 +- .../models/ibert/configuration_ibert.py | 3 +- .../models/idefics/configuration_idefics.py | 2 +- .../models/idefics/modeling_idefics.py | 3 +- .../models/idefics/modeling_tf_idefics.py | 2 +- src/transformers/models/idefics/perceiver.py | 1 + .../models/idefics/perceiver_tf.py | 1 + src/transformers/models/idefics/vision.py | 3 +- src/transformers/models/idefics/vision_tf.py | 3 +- .../models/imagegpt/configuration_imagegpt.py | 2 +- ...onvert_imagegpt_original_tf2_to_pytorch.py | 1 - .../models/informer/modeling_informer.py | 2 +- .../configuration_instructblip.py | 2 +- .../instructblip/modeling_instructblip.py | 2 +- .../models/jamba/configuration_jamba.py | 3 +- .../models/jamba/modeling_jamba.py | 3 +- .../models/jukebox/configuration_jukebox.py | 2 +- .../models/jukebox/tokenization_jukebox.py | 1 - .../models/kosmos2/configuration_kosmos2.py | 2 +- .../models/kosmos2/modeling_kosmos2.py | 3 +- .../models/layoutlm/configuration_layoutlm.py | 3 +- .../models/layoutlm/modeling_layoutlm.py | 3 +- .../models/layoutlm/modeling_tf_layoutlm.py | 3 +- .../models/layoutlm/tokenization_layoutlm.py | 2 +- .../layoutlm/tokenization_layoutlm_fast.py | 2 +- .../layoutlmv2/configuration_layoutlmv2.py | 2 +- .../models/layoutlmv2/modeling_layoutlmv2.py | 2 +- .../layoutlmv3/configuration_layoutlmv3.py | 2 +- .../layoutlmv3/modeling_tf_layoutlmv3.py | 1 - .../models/layoutxlm/processing_layoutxlm.py | 1 + .../layoutxlm/tokenization_layoutxlm.py | 3 +- .../layoutxlm/tokenization_layoutxlm_fast.py | 3 +- .../models/led/configuration_led.py | 2 +- src/transformers/models/led/modeling_led.py | 3 +- .../models/led/modeling_tf_led.py | 3 +- .../models/levit/configuration_levit.py | 2 +- .../levit/convert_levit_timm_to_pytorch.py | 1 - .../models/levit/modeling_levit.py | 2 +- .../models/lilt/configuration_lilt.py | 2 +- .../models/llama/configuration_llama.py | 2 +- .../models/llama/modeling_flax_llama.py | 1 + .../models/llama/tokenization_llama.py | 1 + .../models/llava/configuration_llava.py | 2 +- .../models/llava/processing_llava.py | 1 - .../llava_next/configuration_llava_next.py | 2 +- .../longformer/configuration_longformer.py | 3 +- ...r_original_pytorch_lightning_to_pytorch.py | 1 - .../longformer/modeling_tf_longformer.py | 1 - .../tokenization_longformer_fast.py | 1 + .../models/longt5/configuration_longt5.py | 3 +- .../convert_longt5x_checkpoint_to_flax.py | 6 +- .../models/longt5/modeling_flax_longt5.py | 3 +- .../models/longt5/modeling_longt5.py | 3 +- .../models/luke/configuration_luke.py | 2 +- .../models/lxmert/configuration_lxmert.py | 3 +- ...xmert_original_tf_checkpoint_to_pytorch.py | 1 - .../models/lxmert/modeling_lxmert.py | 3 +- .../models/lxmert/modeling_tf_lxmert.py | 3 +- .../models/m2m_100/configuration_m2m_100.py | 3 +- .../models/m2m_100/tokenization_m2m_100.py | 1 + .../models/marian/configuration_marian.py | 3 +- .../models/marian/modeling_flax_marian.py | 2 +- .../models/marian/modeling_marian.py | 1 - .../models/marian/modeling_tf_marian.py | 3 +- .../models/markuplm/configuration_markuplm.py | 2 +- .../models/markuplm/modeling_markuplm.py | 2 +- .../models/markuplm/processing_markuplm.py | 1 + .../mask2former/configuration_mask2former.py | 3 +- .../mask2former/modeling_mask2former.py | 2 +- .../maskformer/configuration_maskformer.py | 3 +- .../configuration_maskformer_swin.py | 2 +- .../convert_maskformer_resnet_to_pytorch.py | 1 - .../convert_maskformer_swin_to_pytorch.py | 1 - .../models/maskformer/modeling_maskformer.py | 2 +- .../models/mbart/configuration_mbart.py | 3 +- .../models/mbart/modeling_flax_mbart.py | 2 +- .../models/mbart/modeling_mbart.py | 3 +- .../models/mbart/modeling_tf_mbart.py | 3 +- .../models/mega/configuration_mega.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 + .../configuration_megatron_bert.py | 2 +- .../megatron_bert/modeling_megatron_bert.py | 3 +- .../models/mgp_str/configuration_mgp_str.py | 2 +- .../models/mgp_str/modeling_mgp_str.py | 2 +- .../models/mistral/configuration_mistral.py | 2 +- .../models/mistral/modeling_flax_mistral.py | 3 +- .../models/mixtral/configuration_mixtral.py | 2 +- .../models/mixtral/modeling_mixtral.py | 3 +- .../models/mluke/tokenization_mluke.py | 3 +- .../mobilebert/configuration_mobilebert.py | 3 +- .../mobilebert/modeling_tf_mobilebert.py | 3 +- .../mobilebert/tokenization_mobilebert.py | 1 - .../configuration_mobilenet_v1.py | 2 +- ...nvert_original_tf_checkpoint_to_pytorch.py | 1 - .../mobilenet_v1/modeling_mobilenet_v1.py | 3 +- .../configuration_mobilenet_v2.py | 2 +- ...nvert_original_tf_checkpoint_to_pytorch.py | 1 - .../mobilenet_v2/modeling_mobilenet_v2.py | 27 ++-- .../mobilevit/configuration_mobilevit.py | 2 +- .../mobilevit/convert_mlcvnets_to_pytorch.py | 1 - .../models/mobilevit/modeling_mobilevit.py | 3 +- .../models/mobilevit/modeling_tf_mobilevit.py | 2 +- .../mobilevitv2/configuration_mobilevitv2.py | 2 +- .../convert_mlcvnets_to_pytorch.py | 1 - .../mobilevitv2/modeling_mobilevitv2.py | 3 +- .../models/mpnet/configuration_mpnet.py | 2 +- .../models/mpnet/modeling_mpnet.py | 1 - .../models/mpnet/modeling_tf_mpnet.py | 3 +- .../models/mpt/configuration_mpt.py | 3 +- .../models/mra/configuration_mra.py | 2 +- src/transformers/models/mra/modeling_mra.py | 3 +- .../models/mt5/configuration_mt5.py | 3 +- .../models/mt5/modeling_flax_mt5.py | 2 +- src/transformers/models/mt5/modeling_mt5.py | 2 +- .../models/mt5/modeling_tf_mt5.py | 2 +- .../models/musicgen/configuration_musicgen.py | 2 +- .../musicgen/convert_musicgen_transformers.py | 1 + .../models/musicgen/modeling_musicgen.py | 3 +- .../models/musicgen/processing_musicgen.py | 1 + .../configuration_musicgen_melody.py | 2 +- .../convert_musicgen_melody_transformers.py | 1 + .../feature_extraction_musicgen_melody.py | 1 + .../modeling_musicgen_melody.py | 3 +- .../processing_musicgen_melody.py | 1 + .../models/mvp/configuration_mvp.py | 3 +- src/transformers/models/mvp/modeling_mvp.py | 3 +- .../models/nat/configuration_nat.py | 2 +- src/transformers/models/nat/modeling_nat.py | 3 +- .../models/nezha/modeling_nezha.py | 1 - .../models/nllb_moe/configuration_nllb_moe.py | 3 +- .../models/nllb_moe/modeling_nllb_moe.py | 3 +- .../models/nougat/convert_nougat_to_hf.py | 24 ++-- .../models/nougat/tokenization_nougat_fast.py | 1 + .../configuration_nystromformer.py | 2 +- .../nystromformer/modeling_nystromformer.py | 3 +- .../models/olmo/configuration_olmo.py | 2 +- .../oneformer/configuration_oneformer.py | 1 + .../models/oneformer/modeling_oneformer.py | 3 +- .../models/openai/configuration_openai.py | 2 +- ...penai_original_tf_checkpoint_to_pytorch.py | 1 - .../models/openai/modeling_openai.py | 1 - .../models/openai/modeling_tf_openai.py | 2 +- .../models/openai/tokenization_openai.py | 1 - .../models/openai/tokenization_openai_fast.py | 1 - .../models/opt/configuration_opt.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/opt/modeling_flax_opt.py | 2 +- src/transformers/models/opt/modeling_opt.py | 3 +- .../models/opt/modeling_tf_opt.py | 3 +- .../models/owlv2/configuration_owlv2.py | 2 +- .../models/owlv2/modeling_owlv2.py | 2 +- .../models/owlvit/configuration_owlvit.py | 2 +- .../models/owlvit/modeling_owlvit.py | 2 +- .../paligemma/configuration_paligemma.py | 2 +- .../convert_paligemma_weights_to_hf.py | 4 +- .../models/paligemma/modeling_paligemma.py | 3 +- .../models/paligemma/processing_paligemma.py | 1 - .../configuration_patchtsmixer.py | 2 +- .../patchtsmixer/modeling_patchtsmixer.py | 2 +- .../models/patchtst/modeling_patchtst.py | 2 +- .../models/pegasus/configuration_pegasus.py | 2 +- .../models/pegasus/modeling_flax_pegasus.py | 3 +- .../models/pegasus/modeling_pegasus.py | 2 +- .../models/pegasus/modeling_tf_pegasus.py | 3 +- .../pegasus/tokenization_pegasus_fast.py | 3 +- .../pegasus_x/configuration_pegasus_x.py | 2 +- .../models/pegasus_x/modeling_pegasus_x.py | 2 +- .../perceiver/configuration_perceiver.py | 2 +- .../convert_perceiver_haiku_to_pytorch.py | 1 - .../models/perceiver/modeling_perceiver.py | 2 +- .../perceiver/tokenization_perceiver.py | 3 +- .../persimmon/configuration_persimmon.py | 2 +- .../models/persimmon/modeling_persimmon.py | 3 +- .../models/phi/configuration_phi.py | 3 +- src/transformers/models/phi/modeling_phi.py | 3 +- .../models/phi3/configuration_phi3.py | 3 +- src/transformers/models/phi3/modeling_phi3.py | 2 +- .../models/phobert/tokenization_phobert.py | 3 +- .../pix2struct/configuration_pix2struct.py | 2 +- .../pix2struct/image_processing_pix2struct.py | 1 + .../models/pix2struct/modeling_pix2struct.py | 2 +- .../models/plbart/configuration_plbart.py | 3 +- .../models/plbart/modeling_plbart.py | 3 +- .../poolformer/configuration_poolformer.py | 3 +- .../models/poolformer/modeling_poolformer.py | 3 +- .../pop2piano/configuration_pop2piano.py | 3 +- .../convert_pop2piano_weights_to_hf.py | 4 +- .../pop2piano/feature_extraction_pop2piano.py | 2 +- .../models/pop2piano/modeling_pop2piano.py | 3 +- .../models/pop2piano/processing_pop2piano.py | 2 +- .../prophetnet/configuration_prophetnet.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/prophetnet/modeling_prophetnet.py | 2 +- .../models/pvt/configuration_pvt.py | 2 +- .../models/pvt/convert_pvt_to_pytorch.py | 1 - src/transformers/models/pvt/modeling_pvt.py | 2 +- .../models/qdqbert/configuration_qdqbert.py | 2 +- .../models/qdqbert/modeling_qdqbert.py | 3 +- .../models/qwen2/configuration_qwen2.py | 2 +- .../models/qwen2/modeling_qwen2.py | 3 +- .../qwen2_moe/configuration_qwen2_moe.py | 2 +- .../models/rag/configuration_rag.py | 3 +- .../models/rag/modeling_tf_rag.py | 1 - .../models/rag/tokenization_rag.py | 1 + .../models/realm/configuration_realm.py | 2 +- .../models/realm/modeling_realm.py | 2 +- .../configuration_recurrent_gemma.py | 2 +- .../modeling_recurrent_gemma.py | 2 +- .../models/reformer/configuration_reformer.py | 2 +- ...ert_reformer_trax_checkpoint_to_pytorch.py | 1 - .../models/reformer/tokenization_reformer.py | 3 +- .../reformer/tokenization_reformer_fast.py | 3 +- .../models/regnet/configuration_regnet.py | 2 +- .../regnet/convert_regnet_to_pytorch.py | 1 - .../models/regnet/modeling_regnet.py | 2 +- .../models/regnet/modeling_tf_regnet.py | 2 +- .../models/rembert/configuration_rembert.py | 3 +- ...onvert_rembert_tf_checkpoint_to_pytorch.py | 1 - .../models/rembert/modeling_rembert.py | 3 +- .../models/rembert/modeling_tf_rembert.py | 3 +- .../models/rembert/tokenization_rembert.py | 1 - .../rembert/tokenization_rembert_fast.py | 3 +- .../models/resnet/configuration_resnet.py | 2 +- .../resnet/convert_resnet_to_pytorch.py | 1 - .../models/resnet/modeling_resnet.py | 2 +- .../models/resnet/modeling_tf_resnet.py | 2 +- .../models/roberta/configuration_roberta.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/roberta/modeling_tf_roberta.py | 3 +- .../roberta/tokenization_roberta_fast.py | 1 + .../configuration_roberta_prelayernorm.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../modeling_flax_roberta_prelayernorm.py | 3 +- .../modeling_tf_roberta_prelayernorm.py | 3 +- .../models/roc_bert/configuration_roc_bert.py | 2 +- .../models/roc_bert/modeling_roc_bert.py | 2 +- .../models/roformer/configuration_roformer.py | 2 +- ...ormer_original_tf_checkpoint_to_pytorch.py | 1 - .../models/roformer/modeling_flax_roformer.py | 2 +- .../models/roformer/modeling_roformer.py | 3 +- .../models/roformer/modeling_tf_roformer.py | 3 +- .../roformer/tokenization_roformer_fast.py | 1 + .../models/rwkv/configuration_rwkv.py | 2 +- .../rwkv/convert_rwkv_checkpoint_to_hf.py | 1 - .../models/sam/configuration_sam.py | 3 +- .../models/sam/convert_sam_to_hf.py | 1 + .../models/sam/image_processing_sam.py | 1 + src/transformers/models/sam/modeling_sam.py | 2 +- .../models/sam/modeling_tf_sam.py | 1 - src/transformers/models/sam/processing_sam.py | 1 + .../configuration_seamless_m4t.py | 2 +- .../seamless_m4t/convert_fairseq2_to_hf.py | 3 +- .../seamless_m4t/modeling_seamless_m4t.py | 3 +- .../seamless_m4t/tokenization_seamless_m4t.py | 1 + .../tokenization_seamless_m4t_fast.py | 1 + .../configuration_seamless_m4t_v2.py | 2 +- .../seamless_m4t_v2/convert_fairseq2_to_hf.py | 3 +- .../modeling_seamless_m4t_v2.py | 3 +- .../segformer/configuration_segformer.py | 2 +- .../convert_segformer_original_to_pytorch.py | 1 - .../models/segformer/modeling_segformer.py | 3 +- .../models/segformer/modeling_tf_segformer.py | 3 +- .../models/seggpt/configuration_seggpt.py | 3 +- .../models/seggpt/convert_seggpt_to_hf.py | 1 - .../models/seggpt/modeling_seggpt.py | 3 +- .../models/sew/configuration_sew.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - src/transformers/models/sew/modeling_sew.py | 2 +- .../models/sew_d/configuration_sew_d.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/sew_d/modeling_sew_d.py | 2 +- .../models/siglip/configuration_siglip.py | 2 +- .../models/siglip/convert_siglip_to_hf.py | 1 - .../models/siglip/modeling_siglip.py | 3 +- .../models/siglip/tokenization_siglip.py | 2 +- ...rt_wav2vec2_seq2seq_original_to_pytorch.py | 1 - ...xt_wav2vec2_seq2seq_original_to_pytorch.py | 1 - .../modeling_flax_speech_encoder_decoder.py | 2 +- .../modeling_speech_encoder_decoder.py | 3 +- .../configuration_speech_to_text.py | 2 +- .../speech_to_text/modeling_speech_to_text.py | 2 +- .../modeling_tf_speech_to_text.py | 3 +- .../processing_speech_to_text.py | 1 + .../tokenization_speech_to_text.py | 1 + .../configuration_speech_to_text_2.py | 2 +- .../modeling_speech_to_text_2.py | 3 +- .../processing_speech_to_text_2.py | 1 + .../models/speecht5/configuration_speecht5.py | 2 +- .../models/speecht5/modeling_speecht5.py | 2 +- .../models/speecht5/tokenization_speecht5.py | 1 - .../models/splinter/configuration_splinter.py | 2 +- .../models/splinter/modeling_splinter.py | 3 +- .../squeezebert/configuration_squeezebert.py | 3 +- .../squeezebert/modeling_squeezebert.py | 3 +- .../models/stablelm/configuration_stablelm.py | 2 +- .../models/stablelm/modeling_stablelm.py | 3 +- .../starcoder2/configuration_starcoder2.py | 2 +- .../models/starcoder2/modeling_starcoder2.py | 3 +- .../models/superpoint/modeling_superpoint.py | 1 + .../swiftformer/configuration_swiftformer.py | 2 +- .../convert_swiftformer_original_to_hf.py | 1 - .../swiftformer/modeling_swiftformer.py | 3 +- .../swiftformer/modeling_tf_swiftformer.py | 3 +- .../models/swin/configuration_swin.py | 2 +- .../swin/convert_swin_simmim_to_pytorch.py | 12 +- .../swin/convert_swin_timm_to_pytorch.py | 12 +- src/transformers/models/swin/modeling_swin.py | 3 +- .../models/swin/modeling_tf_swin.py | 3 +- .../models/swin2sr/configuration_swin2sr.py | 2 +- .../convert_swin2sr_original_to_pytorch.py | 24 ++-- .../models/swin2sr/modeling_swin2sr.py | 3 +- .../models/swinv2/configuration_swinv2.py | 2 +- .../swinv2/convert_swinv2_timm_to_pytorch.py | 18 +-- .../models/swinv2/modeling_swinv2.py | 3 +- .../configuration_switch_transformers.py | 3 +- .../modeling_switch_transformers.py | 3 +- .../models/t5/configuration_t5.py | 3 +- ...rt_t5_original_tf_checkpoint_to_pytorch.py | 1 - .../t5/convert_t5x_checkpoint_to_flax.py | 126 ++++++++-------- .../models/t5/modeling_flax_t5.py | 3 +- src/transformers/models/t5/modeling_t5.py | 3 +- src/transformers/models/t5/modeling_tf_t5.py | 3 +- src/transformers/models/t5/tokenization_t5.py | 3 +- .../models/t5/tokenization_t5_fast.py | 3 +- .../configuration_table_transformer.py | 3 +- .../convert_table_transformer_to_hf.py | 1 - ...convert_table_transformer_to_hf_no_timm.py | 1 - .../modeling_table_transformer.py | 3 +- .../models/tapas/configuration_tapas.py | 1 - ...tapas_original_tf_checkpoint_to_pytorch.py | 1 - .../models/tapas/modeling_tapas.py | 1 - .../models/tapas/modeling_tf_tapas.py | 1 - .../models/tapas/tokenization_tapas.py | 3 +- .../configuration_time_series_transformer.py | 2 +- .../modeling_time_series_transformer.py | 2 +- .../timesformer/configuration_timesformer.py | 2 +- .../timesformer/modeling_timesformer.py | 3 +- .../configuration_timm_backbone.py | 2 +- .../models/trocr/configuration_trocr.py | 2 +- .../trocr/convert_trocr_unilm_to_pytorch.py | 1 - .../models/trocr/modeling_trocr.py | 3 +- .../models/trocr/processing_trocr.py | 1 + .../models/tvlt/configuration_tvlt.py | 2 +- .../models/tvlt/image_processing_tvlt.py | 1 + src/transformers/models/tvlt/modeling_tvlt.py | 3 +- .../models/tvp/configuration_tvp.py | 2 +- src/transformers/models/tvp/processing_tvp.py | 1 - .../models/udop/configuration_udop.py | 3 +- .../models/udop/convert_udop_to_hf.py | 1 - src/transformers/models/udop/modeling_udop.py | 2 +- .../models/udop/tokenization_udop.py | 3 +- .../models/udop/tokenization_udop_fast.py | 3 +- .../models/umt5/configuration_umt5.py | 3 +- .../convert_umt5_checkpoint_to_pytorch.py | 6 +- src/transformers/models/umt5/modeling_umt5.py | 2 +- .../unispeech/configuration_unispeech.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/unispeech/modeling_unispeech.py | 2 +- .../configuration_unispeech_sat.py | 2 +- ...ch_original_s3prl_checkpoint_to_pytorch.py | 1 - ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../unispeech_sat/modeling_unispeech_sat.py | 2 +- .../models/univnet/configuration_univnet.py | 2 +- .../models/univnet/modeling_univnet.py | 2 +- .../models/upernet/configuration_upernet.py | 3 +- .../models/upernet/modeling_upernet.py | 2 +- .../video_llava/configuration_video_llava.py | 2 +- .../video_llava/modeling_video_llava.py | 3 +- .../video_llava/processing_video_llava.py | 1 - .../models/videomae/configuration_videomae.py | 2 +- .../models/videomae/modeling_videomae.py | 3 +- .../models/vilt/configuration_vilt.py | 2 +- .../vilt/convert_vilt_original_to_pytorch.py | 1 - src/transformers/models/vilt/modeling_vilt.py | 2 +- .../models/vipllava/configuration_vipllava.py | 2 +- .../models/vipllava/modeling_vipllava.py | 3 +- .../modeling_flax_vision_encoder_decoder.py | 3 +- .../modeling_tf_vision_encoder_decoder.py | 3 +- .../modeling_vision_encoder_decoder.py | 3 +- .../configuration_vision_text_dual_encoder.py | 3 +- .../modeling_flax_vision_text_dual_encoder.py | 3 +- .../modeling_tf_vision_text_dual_encoder.py | 1 - .../modeling_vision_text_dual_encoder.py | 3 +- .../visual_bert/configuration_visual_bert.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../visual_bert/modeling_visual_bert.py | 3 +- .../models/vit/configuration_vit.py | 2 +- .../models/vit/convert_dino_to_pytorch.py | 1 - .../models/vit/convert_vit_timm_to_pytorch.py | 1 - .../models/vit/modeling_tf_vit.py | 3 +- src/transformers/models/vit/modeling_vit.py | 3 +- .../vit_hybrid/configuration_vit_hybrid.py | 3 +- .../convert_vit_hybrid_timm_to_pytorch.py | 1 - .../models/vit_hybrid/modeling_vit_hybrid.py | 3 +- .../models/vit_mae/configuration_vit_mae.py | 2 +- .../models/vit_mae/modeling_tf_vit_mae.py | 3 +- .../models/vit_mae/modeling_vit_mae.py | 3 +- .../models/vit_msn/configuration_vit_msn.py | 3 +- .../models/vit_msn/modeling_vit_msn.py | 3 +- .../models/vitdet/configuration_vitdet.py | 3 +- .../models/vitdet/modeling_vitdet.py | 3 +- .../models/vitmatte/configuration_vitmatte.py | 2 +- .../models/vitmatte/modeling_vitmatte.py | 2 +- .../models/vits/configuration_vits.py | 3 +- src/transformers/models/vits/modeling_vits.py | 2 +- .../models/vits/tokenization_vits.py | 1 - .../models/vivit/configuration_vivit.py | 2 +- .../vivit/convert_vivit_flax_to_pytorch.py | 1 + .../models/vivit/image_processing_vivit.py | 1 + .../models/vivit/modeling_vivit.py | 3 +- .../models/wav2vec2/configuration_wav2vec2.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - ...c2_original_s3prl_checkpoint_to_pytorch.py | 1 - .../models/wav2vec2/modeling_flax_wav2vec2.py | 2 +- .../models/wav2vec2/modeling_tf_wav2vec2.py | 3 +- .../models/wav2vec2/modeling_wav2vec2.py | 2 +- .../models/wav2vec2/processing_wav2vec2.py | 1 + .../configuration_wav2vec2_bert.py | 3 +- .../convert_wav2vec2_seamless_checkpoint.py | 1 - .../wav2vec2_bert/modeling_wav2vec2_bert.py | 2 +- .../wav2vec2_bert/processing_wav2vec2_bert.py | 1 + .../configuration_wav2vec2_conformer.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../modeling_wav2vec2_conformer.py | 2 +- .../tokenization_wav2vec2_phoneme.py | 1 - .../processing_wav2vec2_with_lm.py | 1 + .../models/wavlm/configuration_wavlm.py | 2 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - ...lm_original_s3prl_checkpoint_to_pytorch.py | 1 - .../models/wavlm/modeling_wavlm.py | 2 +- .../models/whisper/configuration_whisper.py | 2 +- .../whisper/feature_extraction_whisper.py | 1 + .../models/whisper/modeling_flax_whisper.py | 2 +- .../models/whisper/modeling_tf_whisper.py | 3 +- .../models/whisper/modeling_whisper.py | 3 +- .../models/whisper/processing_whisper.py | 1 - .../models/whisper/tokenization_whisper.py | 1 + .../whisper/tokenization_whisper_fast.py | 1 + .../models/x_clip/configuration_x_clip.py | 2 +- .../models/x_clip/modeling_x_clip.py | 3 +- .../models/xglm/configuration_xglm.py | 2 +- .../models/xglm/modeling_flax_xglm.py | 3 +- .../models/xglm/modeling_tf_xglm.py | 3 +- src/transformers/models/xglm/modeling_xglm.py | 3 +- .../models/xglm/tokenization_xglm.py | 1 + .../models/xlm/configuration_xlm.py | 3 +- ..._original_pytorch_checkpoint_to_pytorch.py | 1 - .../models/xlm/modeling_tf_xlm.py | 3 +- src/transformers/models/xlm/modeling_xlm.py | 2 +- .../models/xlm/tokenization_xlm.py | 5 +- .../configuration_xlm_prophetnet.py | 3 +- .../xlm_prophetnet/modeling_xlm_prophetnet.py | 3 +- .../xlm_roberta/configuration_xlm_roberta.py | 3 +- .../xlm_roberta/modeling_tf_xlm_roberta.py | 3 +- .../xlm_roberta/tokenization_xlm_roberta.py | 3 +- .../tokenization_xlm_roberta_fast.py | 3 +- .../configuration_xlm_roberta_xl.py | 2 +- .../models/xlnet/configuration_xlnet.py | 2 +- ...xlnet_original_tf_checkpoint_to_pytorch.py | 1 - .../models/xlnet/modeling_tf_xlnet.py | 3 +- .../models/xlnet/modeling_xlnet.py | 3 +- .../models/xlnet/tokenization_xlnet.py | 3 +- .../models/xlnet/tokenization_xlnet_fast.py | 3 +- .../models/xmod/configuration_xmod.py | 3 +- .../models/yolos/configuration_yolos.py | 2 +- .../models/yolos/convert_yolos_to_pytorch.py | 1 - .../models/yolos/modeling_yolos.py | 3 +- .../models/yoso/configuration_yoso.py | 2 +- src/transformers/models/yoso/modeling_yoso.py | 3 +- src/transformers/optimization_tf.py | 1 - src/transformers/processing_utils.py | 2 +- src/transformers/quantizers/base.py | 12 +- src/transformers/time_series_utils.py | 1 + src/transformers/tokenization_utils.py | 11 +- src/transformers/tokenization_utils_fast.py | 5 +- src/transformers/trainer_callback.py | 1 + src/transformers/utils/backbone_utils.py | 2 +- src/transformers/utils/fx.py | 6 +- src/transformers/utils/hub.py | 1 + src/transformers/utils/logging.py | 3 +- .../utils/sentencepiece_model_pb2_new.py | 1 + tests/models/align/test_modeling_align.py | 3 +- tests/models/altclip/test_modeling_altclip.py | 3 +- ..._modeling_audio_spectrogram_transformer.py | 2 +- .../autoformer/test_modeling_autoformer.py | 2 +- tests/models/bark/test_modeling_bark.py | 3 +- tests/models/bart/test_modeling_bart.py | 3 +- tests/models/beit/test_modeling_beit.py | 3 +- tests/models/bert/test_tokenization_bert.py | 20 +-- .../models/bert/test_tokenization_bert_tf.py | 2 +- .../models/big_bird/test_modeling_big_bird.py | 3 +- .../test_modeling_bigbird_pegasus.py | 3 +- tests/models/biogpt/test_modeling_biogpt.py | 2 +- tests/models/bit/test_modeling_bit.py | 3 +- .../blenderbot/test_modeling_blenderbot.py | 2 +- .../test_tokenization_blenderbot.py | 1 + .../test_modeling_blenderbot_small.py | 2 +- .../test_tokenization_blenderbot_small.py | 1 + tests/models/blip/test_modeling_blip.py | 3 +- tests/models/blip/test_modeling_blip_text.py | 3 +- tests/models/blip/test_modeling_tf_blip.py | 3 +- .../models/blip/test_modeling_tf_blip_text.py | 3 +- tests/models/blip_2/test_modeling_blip_2.py | 3 +- .../bridgetower/test_modeling_bridgetower.py | 2 +- tests/models/bros/test_modeling_bros.py | 2 +- tests/models/byt5/test_tokenization_byt5.py | 4 +- tests/models/canine/test_modeling_canine.py | 3 +- .../models/canine/test_tokenization_canine.py | 4 +- .../test_modeling_chinese_clip.py | 2 +- tests/models/clap/test_modeling_clap.py | 3 +- tests/models/clip/test_modeling_clip.py | 3 +- tests/models/clip/test_modeling_tf_clip.py | 3 +- tests/models/clip/test_tokenization_clip.py | 14 +- tests/models/clipseg/test_modeling_clipseg.py | 3 +- tests/models/clvp/test_modeling_clvp.py | 3 +- tests/models/cohere/test_modeling_cohere.py | 2 +- .../test_modeling_conditional_detr.py | 3 +- .../models/convbert/test_modeling_convbert.py | 3 +- .../models/convnext/test_modeling_convnext.py | 3 +- .../convnext/test_modeling_tf_convnext.py | 2 +- .../convnextv2/test_modeling_convnextv2.py | 3 +- .../convnextv2/test_modeling_tf_convnextv2.py | 2 +- tests/models/cpmant/test_modeling_cpmant.py | 2 +- tests/models/cvt/test_modeling_cvt.py | 3 +- tests/models/cvt/test_modeling_tf_cvt.py | 3 +- .../data2vec/test_modeling_data2vec_audio.py | 2 +- .../data2vec/test_modeling_data2vec_text.py | 2 +- .../data2vec/test_modeling_data2vec_vision.py | 3 +- .../test_modeling_tf_data2vec_vision.py | 2 +- tests/models/dbrx/test_modeling_dbrx.py | 3 +- .../test_modeling_decision_transformer.py | 3 +- .../test_modeling_deformable_detr.py | 3 +- tests/models/deit/test_modeling_deit.py | 3 +- tests/models/deit/test_modeling_tf_deit.py | 3 +- .../test_modeling_depth_anything.py | 3 +- tests/models/deta/test_modeling_deta.py | 3 +- tests/models/detr/test_modeling_detr.py | 3 +- tests/models/dinat/test_modeling_dinat.py | 2 +- tests/models/dinov2/test_modeling_dinov2.py | 3 +- .../models/donut/test_modeling_donut_swin.py | 2 +- tests/models/dpt/test_modeling_dpt.py | 3 +- .../dpt/test_modeling_dpt_auto_backbone.py | 3 +- tests/models/dpt/test_modeling_dpt_hybrid.py | 3 +- .../test_modeling_efficientformer.py | 3 +- .../test_modeling_tf_efficientformer.py | 2 +- .../test_modeling_efficientnet.py | 3 +- .../electra/test_tokenization_electra.py | 20 +-- tests/models/encodec/test_modeling_encodec.py | 2 +- tests/models/ernie_m/test_modeling_ernie_m.py | 3 +- .../ernie_m/test_tokenization_ernie_m.py | 2 +- tests/models/esm/test_modeling_esm.py | 3 +- tests/models/esm/test_modeling_esmfold.py | 3 +- tests/models/falcon/test_modeling_falcon.py | 3 +- .../test_modeling_fastspeech2_conformer.py | 2 +- tests/models/flava/test_modeling_flava.py | 3 +- tests/models/fnet/test_modeling_fnet.py | 3 +- .../models/focalnet/test_modeling_focalnet.py | 2 +- .../models/funnel/test_tokenization_funnel.py | 8 +- tests/models/fuyu/test_modeling_fuyu.py | 2 +- tests/models/gemma/test_modeling_gemma.py | 3 +- tests/models/glpn/test_modeling_glpn.py | 3 +- .../models/gpt2/test_tokenization_gpt2_tf.py | 2 +- tests/models/gpt_neo/test_modeling_gpt_neo.py | 3 +- .../models/gpt_neox/test_modeling_gpt_neox.py | 3 +- .../test_modeling_gpt_neox_japanese.py | 3 +- .../graphormer/test_modeling_graphormer.py | 3 +- .../test_modeling_grounding_dino.py | 2 +- .../models/groupvit/test_modeling_groupvit.py | 3 +- .../groupvit/test_modeling_tf_groupvit.py | 3 +- tests/models/hubert/test_modeling_hubert.py | 3 +- tests/models/idefics/test_modeling_idefics.py | 2 +- .../idefics/test_modeling_tf_idefics.py | 2 +- .../models/informer/test_modeling_informer.py | 2 +- .../test_modeling_instructblip.py | 3 +- tests/models/jamba/test_modeling_jamba.py | 3 +- tests/models/kosmos2/test_modeling_kosmos2.py | 3 +- .../layoutlm/test_tokenization_layoutlm.py | 4 +- .../layoutlmv2/test_modeling_layoutlmv2.py | 3 +- .../test_tokenization_layoutlmv2.py | 14 +- .../layoutlmv3/test_modeling_layoutlmv3.py | 2 +- .../layoutlmv3/test_modeling_tf_layoutlmv3.py | 2 +- .../layoutxlm/test_tokenization_layoutxlm.py | 2 +- tests/models/led/test_modeling_led.py | 3 +- tests/models/levit/test_modeling_levit.py | 3 +- .../test_tokenization_longformer.py | 3 +- tests/models/luke/test_modeling_luke.py | 3 +- .../models/lxmert/test_tokenization_lxmert.py | 4 +- tests/models/m2m_100/test_modeling_m2m_100.py | 3 +- tests/models/marian/test_modeling_marian.py | 2 +- .../markuplm/test_tokenization_markuplm.py | 2 +- .../mask2former/test_modeling_mask2former.py | 2 +- .../maskformer/test_modeling_maskformer.py | 2 +- .../test_modeling_maskformer_swin.py | 2 +- tests/models/mbart/test_modeling_mbart.py | 3 +- .../test_modeling_megatron_bert.py | 3 +- tests/models/mgp_str/test_modeling_mgp_str.py | 2 +- .../models/mgp_str/test_processor_mgp_str.py | 2 +- tests/models/mixtral/test_modeling_mixtral.py | 3 +- .../test_tokenization_mobilebert.py | 23 ++- .../test_modeling_mobilenet_v1.py | 3 +- .../test_modeling_mobilenet_v2.py | 3 +- .../mobilevit/test_modeling_mobilevit.py | 3 +- .../mobilevit/test_modeling_tf_mobilevit.py | 3 +- .../mobilevitv2/test_modeling_mobilevitv2.py | 3 +- tests/models/mpnet/test_tokenization_mpnet.py | 4 +- tests/models/mra/test_modeling_mra.py | 3 +- .../models/musicgen/test_modeling_musicgen.py | 3 +- .../test_modeling_musicgen_melody.py | 3 +- tests/models/mvp/test_modeling_mvp.py | 3 +- tests/models/nat/test_modeling_nat.py | 2 +- .../models/nllb_moe/test_modeling_nllb_moe.py | 3 +- .../test_modeling_nystromformer.py | 3 +- tests/models/olmo/test_modeling_olmo.py | 2 +- .../oneformer/test_modeling_oneformer.py | 2 +- tests/models/opt/test_modeling_opt.py | 3 +- tests/models/owlv2/test_modeling_owlv2.py | 3 +- tests/models/owlvit/test_modeling_owlvit.py | 3 +- .../paligemma/test_modeling_paligemma.py | 2 +- .../test_modeling_patchtsmixer.py | 2 +- .../models/patchtst/test_modeling_patchtst.py | 2 +- tests/models/pegasus/test_modeling_pegasus.py | 2 +- .../pegasus_x/test_modeling_pegasus_x.py | 3 +- .../perceiver/test_modeling_perceiver.py | 2 +- .../perceiver/test_tokenization_perceiver.py | 4 +- .../persimmon/test_modeling_persimmon.py | 3 +- tests/models/phi/test_modeling_phi.py | 3 +- tests/models/phi3/test_modeling_phi3.py | 3 +- .../pix2struct/test_modeling_pix2struct.py | 2 +- tests/models/plbart/test_modeling_plbart.py | 3 +- .../poolformer/test_modeling_poolformer.py | 3 +- .../pop2piano/test_modeling_pop2piano.py | 2 +- .../test_tokenization_prophetnet.py | 16 +-- tests/models/pvt/test_modeling_pvt.py | 3 +- tests/models/qdqbert/test_modeling_qdqbert.py | 3 +- tests/models/qwen2/test_modeling_qwen2.py | 3 +- .../qwen2_moe/test_modeling_qwen2_moe.py | 3 +- tests/models/realm/test_modeling_realm.py | 2 +- tests/models/realm/test_tokenization_realm.py | 20 +-- .../test_modeling_recurrent_gemma.py | 3 +- tests/models/regnet/test_modeling_regnet.py | 3 +- .../models/regnet/test_modeling_tf_regnet.py | 2 +- tests/models/rembert/test_modeling_rembert.py | 3 +- .../rembert/test_tokenization_rembert.py | 3 +- tests/models/resnet/test_modeling_resnet.py | 3 +- .../models/resnet/test_modeling_tf_resnet.py | 3 +- .../models/roc_bert/test_modeling_roc_bert.py | 2 +- .../roc_bert/test_tokenization_roc_bert.py | 12 +- .../models/roformer/test_modeling_roformer.py | 3 +- tests/models/sam/test_modeling_sam.py | 3 +- tests/models/sam/test_modeling_tf_sam.py | 3 +- .../test_modeling_seamless_m4t.py | 3 +- .../test_modeling_seamless_m4t_v2.py | 3 +- .../segformer/test_modeling_segformer.py | 3 +- .../segformer/test_modeling_tf_segformer.py | 2 +- tests/models/seggpt/test_modeling_seggpt.py | 3 +- tests/models/sew/test_modeling_sew.py | 3 +- tests/models/sew_d/test_modeling_sew_d.py | 3 +- tests/models/siglip/test_modeling_siglip.py | 3 +- .../test_modeling_speech_to_text.py | 2 +- .../test_modeling_tf_speech_to_text.py | 2 +- .../test_modeling_speech_to_text_2.py | 2 +- .../models/speecht5/test_modeling_speecht5.py | 2 +- .../models/splinter/test_modeling_splinter.py | 2 +- .../models/stablelm/test_modeling_stablelm.py | 3 +- .../starcoder2/test_modeling_starcoder2.py | 3 +- .../swiftformer/test_modeling_swiftformer.py | 3 +- .../test_modeling_tf_swiftformer.py | 3 +- tests/models/swin/test_modeling_swin.py | 2 +- tests/models/swin/test_modeling_tf_swin.py | 3 +- tests/models/swin2sr/test_modeling_swin2sr.py | 3 +- tests/models/swinv2/test_modeling_swinv2.py | 3 +- .../test_modeling_table_transformer.py | 3 +- tests/models/tapas/test_tokenization_tapas.py | 20 +-- .../test_modeling_time_series_transformer.py | 2 +- .../timesformer/test_modeling_timesformer.py | 3 +- tests/models/trocr/test_modeling_trocr.py | 2 +- .../tvlt/test_feature_extraction_tvlt.py | 2 +- .../models/tvlt/test_image_processor_tvlt.py | 2 +- tests/models/tvlt/test_modeling_tvlt.py | 2 +- tests/models/tvp/test_modeling_tvp.py | 3 +- tests/models/udop/test_tokenization_udop.py | 2 +- .../unispeech/test_modeling_unispeech.py | 2 +- .../test_modeling_unispeech_sat.py | 2 +- tests/models/upernet/test_modeling_upernet.py | 3 +- .../video_llava/test_modeling_video_llava.py | 2 +- .../models/videomae/test_modeling_videomae.py | 3 +- tests/models/vilt/test_modeling_vilt.py | 2 +- .../models/vipllava/test_modeling_vipllava.py | 2 +- ...test_modeling_tf_vision_encoder_decoder.py | 3 +- ..._modeling_flax_vision_text_dual_encoder.py | 3 +- ...st_modeling_tf_vision_text_dual_encoder.py | 3 +- .../test_modeling_vision_text_dual_encoder.py | 3 +- .../visual_bert/test_modeling_visual_bert.py | 2 +- tests/models/vit/test_modeling_tf_vit.py | 3 +- tests/models/vit/test_modeling_vit.py | 3 +- .../vit_hybrid/test_modeling_vit_hybrid.py | 3 +- .../vit_mae/test_modeling_tf_vit_mae.py | 3 +- tests/models/vit_mae/test_modeling_vit_mae.py | 3 +- tests/models/vit_msn/test_modeling_vit_msn.py | 3 +- tests/models/vitdet/test_modeling_vitdet.py | 3 +- .../models/vitmatte/test_modeling_vitmatte.py | 3 +- tests/models/vits/test_modeling_vits.py | 2 +- tests/models/vits/test_tokenization_vits.py | 3 +- tests/models/vivit/test_modeling_vivit.py | 3 +- .../models/wav2vec2/test_modeling_wav2vec2.py | 2 +- .../test_modeling_wav2vec2_bert.py | 3 +- .../test_modeling_wav2vec2_conformer.py | 3 +- .../test_tokenization_wav2vec2_phoneme.py | 1 + tests/models/wavlm/test_modeling_wavlm.py | 2 +- .../whisper/test_modeling_tf_whisper.py | 2 +- tests/models/whisper/test_modeling_whisper.py | 2 +- tests/models/x_clip/test_modeling_x_clip.py | 3 +- tests/models/xlnet/test_tokenization_xlnet.py | 2 +- tests/models/yolos/test_modeling_yolos.py | 3 +- tests/models/yoso/test_modeling_yoso.py | 3 +- .../pytorch/run_glue_model_parallelism.py | 2 +- tests/test_tokenization_common.py | 6 +- tests/tokenization/test_tokenization_utils.py | 1 + utils/add_pipeline_model_mapping_to_test.py | 1 - utils/check_copies.py | 4 +- utils/check_doc_toc.py | 1 - utils/check_docstrings.py | 1 + utils/check_doctest_list.py | 1 + utils/check_dummies.py | 1 + utils/check_repo.py | 1 + utils/check_support_list.py | 1 + utils/check_table.py | 1 + utils/custom_init_isort.py | 1 + utils/download_glue_data.py | 2 +- utils/models_to_deprecate.py | 1 + utils/release.py | 1 + utils/sort_auto_mappings.py | 1 + utils/update_metadata.py | 1 + utils/update_tiny_models.py | 1 - 1172 files changed, 1555 insertions(+), 1861 deletions(-) diff --git a/Makefile b/Makefile index 6c1541a113..f9b2a8c9a7 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ export PYTHONPATH = src check_dirs := examples tests src utils -exclude_folders := examples/research_projects +exclude_folders := "" modified_only_fixup: $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs))) diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index d793cc3a37..a13c62e0fd 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -20,6 +20,7 @@ text file or a dataset. Here is the full list of checkpoints on the hub that can be fine-tuned by this script: https://huggingface.co/models?filter=fill-mask """ + import json import logging import math diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index caa70fe514..c4b47711d9 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -19,6 +19,7 @@ Pretraining the library models for T5-like span-masked language modeling on a te Here is the full list of checkpoints on the hub that can be pretrained by this script: https://huggingface.co/models?filter=t5 """ + import json import logging import math diff --git a/examples/flax/question-answering/utils_qa.py b/examples/flax/question-answering/utils_qa.py index 23a46370d1..79497dbb81 100644 --- a/examples/flax/question-answering/utils_qa.py +++ b/examples/flax/question-answering/utils_qa.py @@ -15,6 +15,7 @@ """ Post-processing utilities for question answering. """ + import collections import json import logging diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index f1f6520201..d1234db015 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning a 🤗 Flax Transformers model for sequence classification on GLUE.""" +"""Finetuning a 🤗 Flax Transformers model for sequence classification on GLUE.""" + import json import logging import math diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index 6ce06c8218..ecb52ceb08 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning a 🤗 Flax Transformers model on token classification tasks (NER, POS, CHUNKS)""" +"""Fine-tuning a 🤗 Flax Transformers model on token classification tasks (NER, POS, CHUNKS)""" + import json import logging import math diff --git a/examples/legacy/benchmarking/plot_csv_file.py b/examples/legacy/benchmarking/plot_csv_file.py index 9a9ad9c670..aa092f5c04 100644 --- a/examples/legacy/benchmarking/plot_csv_file.py +++ b/examples/legacy/benchmarking/plot_csv_file.py @@ -93,14 +93,14 @@ class Plot: self.result_dict[model_name]["seq_len"].append(int(row["sequence_length"])) if can_convert_to_int(row["result"]): # value is not None - self.result_dict[model_name]["result"][ - (int(row["batch_size"]), int(row["sequence_length"])) - ] = int(row["result"]) + self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = ( + int(row["result"]) + ) elif can_convert_to_float(row["result"]): # value is not None - self.result_dict[model_name]["result"][ - (int(row["batch_size"]), int(row["sequence_length"])) - ] = float(row["result"]) + self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = ( + float(row["result"]) + ) def plot(self): fig, ax = plt.subplots() diff --git a/examples/legacy/benchmarking/run_benchmark.py b/examples/legacy/benchmarking/run_benchmark.py index e2e7d4c5ea..85f266566b 100644 --- a/examples/legacy/benchmarking/run_benchmark.py +++ b/examples/legacy/benchmarking/run_benchmark.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Benchmarking the library on inference and training """ +"""Benchmarking the library on inference and training""" from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments diff --git a/examples/legacy/multiple_choice/run_multiple_choice.py b/examples/legacy/multiple_choice/run_multiple_choice.py index 4513970425..fece480cad 100644 --- a/examples/legacy/multiple_choice/run_multiple_choice.py +++ b/examples/legacy/multiple_choice/run_multiple_choice.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for multiple choice (Bert, Roberta, XLNet).""" - +"""Finetuning the library models for multiple choice (Bert, Roberta, XLNet).""" import logging import os diff --git a/examples/legacy/multiple_choice/utils_multiple_choice.py b/examples/legacy/multiple_choice/utils_multiple_choice.py index e3bbc72884..6b7559c49e 100644 --- a/examples/legacy/multiple_choice/utils_multiple_choice.py +++ b/examples/legacy/multiple_choice/utils_multiple_choice.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Multiple choice fine-tuning: utilities to work with multiple choice tasks of reading comprehension """ - +"""Multiple choice fine-tuning: utilities to work with multiple choice tasks of reading comprehension""" import csv import glob diff --git a/examples/legacy/question-answering/run_squad.py b/examples/legacy/question-answering/run_squad.py index 999752485b..f5a827c15a 100644 --- a/examples/legacy/question-answering/run_squad.py +++ b/examples/legacy/question-answering/run_squad.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet).""" - +"""Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet).""" import argparse import glob diff --git a/examples/legacy/question-answering/run_squad_trainer.py b/examples/legacy/question-answering/run_squad_trainer.py index 7e3a6f28e0..a27cb76295 100644 --- a/examples/legacy/question-answering/run_squad_trainer.py +++ b/examples/legacy/question-answering/run_squad_trainer.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning the library models for question-answering.""" - +"""Fine-tuning the library models for question-answering.""" import logging import os diff --git a/examples/legacy/run_language_modeling.py b/examples/legacy/run_language_modeling.py index b157658656..317b2dfb48 100755 --- a/examples/legacy/run_language_modeling.py +++ b/examples/legacy/run_language_modeling.py @@ -20,7 +20,6 @@ GPT, GPT-2 and CTRL are fine-tuned using a causal language modeling (CLM) loss. using a masked language modeling (MLM) loss. XLNet is fine-tuned using a permutation language modeling (PLM) loss. """ - import logging import math import os diff --git a/examples/legacy/run_openai_gpt.py b/examples/legacy/run_openai_gpt.py index d0c21aba27..3831c1bd44 100755 --- a/examples/legacy/run_openai_gpt.py +++ b/examples/legacy/run_openai_gpt.py @@ -28,6 +28,7 @@ --output_dir ../log \ --train_batch_size 16 \ """ + import argparse import csv import logging diff --git a/examples/legacy/run_swag.py b/examples/legacy/run_swag.py index 66d77a1742..dbf712a71f 100755 --- a/examples/legacy/run_swag.py +++ b/examples/legacy/run_swag.py @@ -15,10 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """BERT finetuning runner. - Finetuning the library models for multiple choice on SWAG (Bert). +Finetuning the library models for multiple choice on SWAG (Bert). """ - import argparse import csv import glob diff --git a/examples/legacy/run_transfo_xl.py b/examples/legacy/run_transfo_xl.py index 1c48974f39..ce24fe13d7 100755 --- a/examples/legacy/run_transfo_xl.py +++ b/examples/legacy/run_transfo_xl.py @@ -14,14 +14,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Transformer XL model evaluation script. - Adapted from https://github.com/kimiyoung/transformer-xl. - In particular https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/eval.py +"""PyTorch Transformer XL model evaluation script. +Adapted from https://github.com/kimiyoung/transformer-xl. +In particular https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/eval.py - This script with default values evaluates a pretrained Transformer-XL on WikiText 103 +This script with default values evaluates a pretrained Transformer-XL on WikiText 103 """ - import argparse import logging import math diff --git a/examples/legacy/seq2seq/xla_spawn.py b/examples/legacy/seq2seq/xla_spawn.py index 5df6bfa2d5..f9955acfa2 100644 --- a/examples/legacy/seq2seq/xla_spawn.py +++ b/examples/legacy/seq2seq/xla_spawn.py @@ -23,7 +23,6 @@ Inspired by https://github.com/pytorch/pytorch/blob/master/torch/distributed/lau """ - import importlib import sys from argparse import REMAINDER, ArgumentParser diff --git a/examples/legacy/token-classification/run_ner.py b/examples/legacy/token-classification/run_ner.py index c571d44a12..4fb74e78ff 100644 --- a/examples/legacy/token-classification/run_ner.py +++ b/examples/legacy/token-classification/run_ner.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning the library models for named entity recognition on CoNLL-2003. """ +"""Fine-tuning the library models for named entity recognition on CoNLL-2003.""" + import logging import os import sys diff --git a/examples/legacy/token-classification/utils_ner.py b/examples/legacy/token-classification/utils_ner.py index e7e3a157e3..da4d8c3b60 100644 --- a/examples/legacy/token-classification/utils_ner.py +++ b/examples/legacy/token-classification/utils_ner.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Named entity recognition fine-tuning: utilities to work with CoNLL-2003 task. """ - +"""Named entity recognition fine-tuning: utilities to work with CoNLL-2003 task.""" import logging import os diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 03d75ee3a2..b8f69b4b6f 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning any 🤗 Transformers model for image classification leveraging 🤗 Accelerate.""" +"""Finetuning any 🤗 Transformers model for image classification leveraging 🤗 Accelerate.""" + import argparse import json import logging diff --git a/examples/pytorch/object-detection/run_object_detection_no_trainer.py b/examples/pytorch/object-detection/run_object_detection_no_trainer.py index af66ff2a84..f0889f3d28 100644 --- a/examples/pytorch/object-detection/run_object_detection_no_trainer.py +++ b/examples/pytorch/object-detection/run_object_detection_no_trainer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning 🤗 Transformers model for object detection with Accelerate.""" +"""Finetuning 🤗 Transformers model for object detection with Accelerate.""" import argparse import json diff --git a/examples/pytorch/question-answering/trainer_qa.py b/examples/pytorch/question-answering/trainer_qa.py index 0e82e6b816..10428a2b77 100644 --- a/examples/pytorch/question-answering/trainer_qa.py +++ b/examples/pytorch/question-answering/trainer_qa.py @@ -15,6 +15,7 @@ """ A subclass of `Trainer` specific to Question-Answering tasks """ + import math import time diff --git a/examples/pytorch/question-answering/trainer_seq2seq_qa.py b/examples/pytorch/question-answering/trainer_seq2seq_qa.py index dea184e908..ff9e2d7bc3 100644 --- a/examples/pytorch/question-answering/trainer_seq2seq_qa.py +++ b/examples/pytorch/question-answering/trainer_seq2seq_qa.py @@ -15,6 +15,7 @@ """ A subclass of `Trainer` specific to Question-Answering tasks """ + import math import time from typing import Dict, List, Optional diff --git a/examples/pytorch/question-answering/utils_qa.py b/examples/pytorch/question-answering/utils_qa.py index 23a46370d1..79497dbb81 100644 --- a/examples/pytorch/question-answering/utils_qa.py +++ b/examples/pytorch/question-answering/utils_qa.py @@ -15,6 +15,7 @@ """ Post-processing utilities for question answering. """ + import collections import json import logging diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index b606740535..6521657e3c 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning any 🤗 Transformers model supported by AutoModelForSemanticSegmentation for semantic segmentation.""" +"""Finetuning any 🤗 Transformers model supported by AutoModelForSemanticSegmentation for semantic segmentation.""" import argparse import json diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index 9ec37fbfd1..9592a1f6e4 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -""" Pre-Training a 🤗 Wav2Vec2 model on unlabeled audio data """ +"""Pre-Training a 🤗 Wav2Vec2 model on unlabeled audio data""" import argparse import math diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index 1f02ad145b..6a05f342b3 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition""" +"""Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition""" import functools import json diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py index 2f8f0d6e2b..bf3241c61d 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning a 🤗 Transformers CTC adapter model for automatic speech recognition""" +"""Fine-tuning a 🤗 Transformers CTC adapter model for automatic speech recognition""" import functools import json diff --git a/examples/pytorch/text-classification/run_classification.py b/examples/pytorch/text-classification/run_classification.py index 16a613e025..b5da2063b6 100755 --- a/examples/pytorch/text-classification/run_classification.py +++ b/examples/pytorch/text-classification/run_classification.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for text classification.""" +"""Finetuning the library models for text classification.""" # You can also adapt this script on your own text classification task. Pointers for this are left as comments. import logging diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 751b208e9a..c9d4ec8b10 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for sequence classification on GLUE.""" +"""Finetuning the library models for sequence classification on GLUE.""" # You can also adapt this script on your own text classification task. Pointers for this are left as comments. import logging diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 338c28b184..6a8123f076 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning a 🤗 Transformers model for sequence classification on GLUE.""" +"""Finetuning a 🤗 Transformers model for sequence classification on GLUE.""" + import argparse import json import logging diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index b38cd80aeb..127f06e0f6 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -14,8 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM). - Adapted from `examples/text-classification/run_glue.py`""" +"""Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM). +Adapted from `examples/text-classification/run_glue.py`""" import logging import os diff --git a/examples/pytorch/text-generation/run_generation.py b/examples/pytorch/text-generation/run_generation.py index 557b75572c..0e21a24268 100755 --- a/examples/pytorch/text-generation/run_generation.py +++ b/examples/pytorch/text-generation/run_generation.py @@ -14,9 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Conditional text generation with the auto-regressive models of the library (GPT/GPT-2/CTRL/Transformer-XL/XLNet) -""" - +"""Conditional text generation with the auto-regressive models of the library (GPT/GPT-2/CTRL/Transformer-XL/XLNet)""" import argparse import inspect diff --git a/examples/pytorch/text-generation/run_generation_contrastive_search.py b/examples/pytorch/text-generation/run_generation_contrastive_search.py index a48529fb30..ba4c9a77e9 100755 --- a/examples/pytorch/text-generation/run_generation_contrastive_search.py +++ b/examples/pytorch/text-generation/run_generation_contrastive_search.py @@ -13,13 +13,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" The examples of running contrastive search on the auto-APIs; +"""The examples of running contrastive search on the auto-APIs; Running this example: python run_generation_contrastive_search.py --model_name_or_path=openai-community/gpt2-large --penalty_alpha=0.6 --k=4 --length=256 """ - import argparse import logging diff --git a/examples/pytorch/xla_spawn.py b/examples/pytorch/xla_spawn.py index 5df6bfa2d5..f9955acfa2 100644 --- a/examples/pytorch/xla_spawn.py +++ b/examples/pytorch/xla_spawn.py @@ -23,7 +23,6 @@ Inspired by https://github.com/pytorch/pytorch/blob/master/torch/distributed/lau """ - import importlib import sys from argparse import REMAINDER, ArgumentParser diff --git a/examples/research_projects/adversarial/run_hans.py b/examples/research_projects/adversarial/run_hans.py index 3affbb7a69..23625dfa7e 100644 --- a/examples/research_projects/adversarial/run_hans.py +++ b/examples/research_projects/adversarial/run_hans.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for sequence classification on HANS.""" +"""Finetuning the library models for sequence classification on HANS.""" import logging import os diff --git a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py index 6881bf8d18..5b30155a73 100644 --- a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py +++ b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""PyTorch ALBERT model with Patience-based Early Exit. """ +"""PyTorch ALBERT model with Patience-based Early Exit.""" import logging diff --git a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py index dfa78585a6..c1ce924a57 100644 --- a/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py +++ b/examples/research_projects/bert-loses-patience/pabee/modeling_pabee_bert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""PyTorch BERT model with Patience-based Early Exit. """ - +"""PyTorch BERT model with Patience-based Early Exit.""" import logging diff --git a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py index 847148d557..d1ee5ddde3 100755 --- a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py +++ b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Training and inference using the library models for sequence classification on GLUE (Bert, Albert) with PABEE.""" - +"""Training and inference using the library models for sequence classification on GLUE (Bert, Albert) with PABEE.""" import argparse import glob diff --git a/examples/research_projects/bertabs/configuration_bertabs.py b/examples/research_projects/bertabs/configuration_bertabs.py index 02b8f27cb3..4c65cd3395 100644 --- a/examples/research_projects/bertabs/configuration_bertabs.py +++ b/examples/research_projects/bertabs/configuration_bertabs.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BertAbs configuration """ +"""BertAbs configuration""" + import logging from transformers import PretrainedConfig diff --git a/examples/research_projects/bertabs/convert_bertabs_original_pytorch_checkpoint.py b/examples/research_projects/bertabs/convert_bertabs_original_pytorch_checkpoint.py index b6f5d17751..338ffd21c9 100644 --- a/examples/research_projects/bertabs/convert_bertabs_original_pytorch_checkpoint.py +++ b/examples/research_projects/bertabs/convert_bertabs_original_pytorch_checkpoint.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Convert BertExtAbs's checkpoints. +"""Convert BertExtAbs's checkpoints. The script looks like it is doing something trivial but it is not. The "weights" proposed by the authors are actually the entire model pickled. We need to load diff --git a/examples/research_projects/bertology/run_bertology.py b/examples/research_projects/bertology/run_bertology.py index 4cb046066c..35d096f164 100644 --- a/examples/research_projects/bertology/run_bertology.py +++ b/examples/research_projects/bertology/run_bertology.py @@ -12,13 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Bertology: this script shows how you can explore the internals of the models in the library to: - - compute the entropy of the head attentions - - compute the importance of each head - - prune (remove) the low importance head. - Some parts of this script are adapted from the code of Michel et al. (http://arxiv.org/abs/1905.10650) - which is available at https://github.com/pmichel31415/are-16-heads-really-better-than-1 +"""Bertology: this script shows how you can explore the internals of the models in the library to: +- compute the entropy of the head attentions +- compute the importance of each head +- prune (remove) the low importance head. +Some parts of this script are adapted from the code of Michel et al. (http://arxiv.org/abs/1905.10650) +which is available at https://github.com/pmichel31415/are-16-heads-really-better-than-1 """ + import argparse import logging import os diff --git a/examples/research_projects/bertology/run_prune_gpt.py b/examples/research_projects/bertology/run_prune_gpt.py index fa7484a787..d227634c2b 100644 --- a/examples/research_projects/bertology/run_prune_gpt.py +++ b/examples/research_projects/bertology/run_prune_gpt.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -""" This script is adapted from the Bertology pruning code (https://github.com/huggingface/transformers/blob/783d7d2629e97c5f0c5f9ef01b8c66410275c204/examples/research_projects/bertology/run_bertology.py) +"""This script is adapted from the Bertology pruning code (https://github.com/huggingface/transformers/blob/783d7d2629e97c5f0c5f9ef01b8c66410275c204/examples/research_projects/bertology/run_bertology.py) to prune GPT-like models. The author is @altsoph. """ diff --git a/examples/research_projects/distillation/distiller.py b/examples/research_projects/distillation/distiller.py index 3ef2ba87b2..963af976f5 100644 --- a/examples/research_projects/distillation/distiller.py +++ b/examples/research_projects/distillation/distiller.py @@ -12,9 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" The distiller to distil the student. - Adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) +"""The distiller to distil the student. +Adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) """ + import math import os import time diff --git a/examples/research_projects/distillation/grouped_batch_sampler.py b/examples/research_projects/distillation/grouped_batch_sampler.py index a068f7e09e..fd126b13b5 100644 --- a/examples/research_projects/distillation/grouped_batch_sampler.py +++ b/examples/research_projects/distillation/grouped_batch_sampler.py @@ -12,8 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Adapted from PyTorch Vision (https://github.com/pytorch/vision/blob/master/references/detection/group_by_aspect_ratio.py) -""" +"""Adapted from PyTorch Vision (https://github.com/pytorch/vision/blob/master/references/detection/group_by_aspect_ratio.py)""" + import bisect import copy from collections import defaultdict diff --git a/examples/research_projects/distillation/lm_seqs_dataset.py b/examples/research_projects/distillation/lm_seqs_dataset.py index 8e0a5814ab..647c8f464f 100644 --- a/examples/research_projects/distillation/lm_seqs_dataset.py +++ b/examples/research_projects/distillation/lm_seqs_dataset.py @@ -12,9 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Dataset to distilled models - adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) +"""Dataset to distilled models +adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) """ + import numpy as np import torch from torch.utils.data import Dataset diff --git a/examples/research_projects/distillation/run_squad_w_distillation.py b/examples/research_projects/distillation/run_squad_w_distillation.py index 523d9bedb8..a1150f6b43 100644 --- a/examples/research_projects/distillation/run_squad_w_distillation.py +++ b/examples/research_projects/distillation/run_squad_w_distillation.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" This is the exact same script as `examples/question-answering/run_squad.py` (as of 2020, January 8th) with an additional and optional step of distillation.""" +"""This is the exact same script as `examples/question-answering/run_squad.py` (as of 2020, January 8th) with an additional and optional step of distillation.""" import argparse import glob diff --git a/examples/research_projects/distillation/scripts/binarized_data.py b/examples/research_projects/distillation/scripts/binarized_data.py index 951530d5c7..3fc3214acf 100644 --- a/examples/research_projects/distillation/scripts/binarized_data.py +++ b/examples/research_projects/distillation/scripts/binarized_data.py @@ -15,6 +15,7 @@ """ Preprocessing script before distillation. """ + import argparse import logging import pickle diff --git a/examples/research_projects/distillation/scripts/extract.py b/examples/research_projects/distillation/scripts/extract.py index f60f243dec..c45821d187 100644 --- a/examples/research_projects/distillation/scripts/extract.py +++ b/examples/research_projects/distillation/scripts/extract.py @@ -16,6 +16,7 @@ Preprocessing script before training the distilled model. Specific to RoBERTa -> DistilRoBERTa and GPT2 -> DistilGPT2. """ + import argparse import torch diff --git a/examples/research_projects/distillation/scripts/extract_distilbert.py b/examples/research_projects/distillation/scripts/extract_distilbert.py index a58105f999..8637970c51 100644 --- a/examples/research_projects/distillation/scripts/extract_distilbert.py +++ b/examples/research_projects/distillation/scripts/extract_distilbert.py @@ -16,6 +16,7 @@ Preprocessing script before training DistilBERT. Specific to BERT -> DistilBERT. """ + import argparse import torch diff --git a/examples/research_projects/distillation/scripts/token_counts.py b/examples/research_projects/distillation/scripts/token_counts.py index 736b564ee7..2f80bf31f4 100644 --- a/examples/research_projects/distillation/scripts/token_counts.py +++ b/examples/research_projects/distillation/scripts/token_counts.py @@ -15,6 +15,7 @@ """ Preprocessing script before training the distilled model. """ + import argparse import logging import pickle diff --git a/examples/research_projects/distillation/train.py b/examples/research_projects/distillation/train.py index 1acb527220..15d98ace09 100644 --- a/examples/research_projects/distillation/train.py +++ b/examples/research_projects/distillation/train.py @@ -16,6 +16,7 @@ Training the distilled model. Supported architectures include: BERT -> DistilBERT, RoBERTa -> DistilRoBERTa, GPT2 -> DistilGPT2. """ + import argparse import json import os diff --git a/examples/research_projects/distillation/utils.py b/examples/research_projects/distillation/utils.py index 6d439453fe..e86d2593bb 100644 --- a/examples/research_projects/distillation/utils.py +++ b/examples/research_projects/distillation/utils.py @@ -12,9 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Utils to train DistilBERT - adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) +"""Utils to train DistilBERT +adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) """ + import json import logging import os diff --git a/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py b/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py index fbb165ba42..8940fab5bd 100755 --- a/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py +++ b/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py @@ -20,6 +20,7 @@ text file or a dataset. Here is the full list of checkpoints on the hub that can be fine-tuned by this script: https://huggingface.co/models?filter=fill-mask """ + import logging import os import sys diff --git a/examples/research_projects/lxmert/modeling_frcnn.py b/examples/research_projects/lxmert/modeling_frcnn.py index 499de53207..8aea9b5e1a 100644 --- a/examples/research_projects/lxmert/modeling_frcnn.py +++ b/examples/research_projects/lxmert/modeling_frcnn.py @@ -1,20 +1,21 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal - Adapted From Facebook Inc, Detectron2 && Huggingface Co. +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal +Adapted From Facebook Inc, Detectron2 && Huggingface Co. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ import itertools import math import os diff --git a/examples/research_projects/lxmert/processing_image.py b/examples/research_projects/lxmert/processing_image.py index 4343cfdbce..65f8f6cd37 100644 --- a/examples/research_projects/lxmert/processing_image.py +++ b/examples/research_projects/lxmert/processing_image.py @@ -1,20 +1,21 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal - Adapted From Facebook Inc, Detectron2 +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal +Adapted From Facebook Inc, Detectron2 - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ import sys from typing import Tuple diff --git a/examples/research_projects/lxmert/utils.py b/examples/research_projects/lxmert/utils.py index c75f523a08..995fbd2c19 100644 --- a/examples/research_projects/lxmert/utils.py +++ b/examples/research_projects/lxmert/utils.py @@ -1,20 +1,20 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal, Huggingface team :) - Adapted From Facebook Inc, Detectron2 +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal, Huggingface team :) +Adapted From Facebook Inc, Detectron2 - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" import copy import fnmatch diff --git a/examples/research_projects/lxmert/visualizing_image.py b/examples/research_projects/lxmert/visualizing_image.py index 163d661e87..dcfd8426ff 100644 --- a/examples/research_projects/lxmert/visualizing_image.py +++ b/examples/research_projects/lxmert/visualizing_image.py @@ -1,20 +1,21 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal - Adapted From Facebook Inc, Detectron2 +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal +Adapted From Facebook Inc, Detectron2 - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ import colorsys import io diff --git a/examples/research_projects/mm-imdb/run_mmimdb.py b/examples/research_projects/mm-imdb/run_mmimdb.py index c863857c41..686691e0b9 100644 --- a/examples/research_projects/mm-imdb/run_mmimdb.py +++ b/examples/research_projects/mm-imdb/run_mmimdb.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for multimodal multiclass prediction on MM-IMDB dataset.""" - +"""Finetuning the library models for multimodal multiclass prediction on MM-IMDB dataset.""" import argparse import glob diff --git a/examples/research_projects/movement-pruning/counts_parameters.py b/examples/research_projects/movement-pruning/counts_parameters.py index 89ce40baa7..c0ac53fb78 100644 --- a/examples/research_projects/movement-pruning/counts_parameters.py +++ b/examples/research_projects/movement-pruning/counts_parameters.py @@ -15,6 +15,7 @@ Count remaining (non-zero) weights in the encoder (i.e. the transformer layers). Sparsity and remaining weights levels are equivalent: sparsity % = 100 - remaining weights %. """ + import argparse import os diff --git a/examples/research_projects/movement-pruning/emmental/configuration_bert_masked.py b/examples/research_projects/movement-pruning/emmental/configuration_bert_masked.py index 2a3bd763a2..9c7459f27a 100644 --- a/examples/research_projects/movement-pruning/emmental/configuration_bert_masked.py +++ b/examples/research_projects/movement-pruning/emmental/configuration_bert_masked.py @@ -13,10 +13,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Masked BERT model configuration. It replicates the class `~transformers.BertConfig` +"""Masked BERT model configuration. It replicates the class `~transformers.BertConfig` and adapts it to the specificities of MaskedBert (`pruning_method`, `mask_init` and `mask_scale`.""" - import logging from transformers.configuration_utils import PretrainedConfig diff --git a/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py b/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py index f47395bb00..8c0b091c7d 100644 --- a/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py +++ b/examples/research_projects/movement-pruning/emmental/modeling_bert_masked.py @@ -18,7 +18,6 @@ compute the adaptive mask. Built on top of `transformers.models.bert.modeling_bert`""" - import logging import math diff --git a/examples/research_projects/movement-pruning/masked_run_glue.py b/examples/research_projects/movement-pruning/masked_run_glue.py index e2090c431e..f7103deca1 100644 --- a/examples/research_projects/movement-pruning/masked_run_glue.py +++ b/examples/research_projects/movement-pruning/masked_run_glue.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-pruning Masked BERT on sequence classification on GLUE.""" +"""Fine-pruning Masked BERT on sequence classification on GLUE.""" import argparse import glob diff --git a/examples/research_projects/movement-pruning/masked_run_squad.py b/examples/research_projects/movement-pruning/masked_run_squad.py index 14d92dde4e..d7b4b19112 100644 --- a/examples/research_projects/movement-pruning/masked_run_squad.py +++ b/examples/research_projects/movement-pruning/masked_run_squad.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-pruning Masked BERT for question-answering on SQuAD.""" - +"""Fine-pruning Masked BERT for question-answering on SQuAD.""" import argparse import glob diff --git a/examples/research_projects/onnx/summarization/run_onnx_exporter.py b/examples/research_projects/onnx/summarization/run_onnx_exporter.py index 889eefb4e7..fa82673270 100644 --- a/examples/research_projects/onnx/summarization/run_onnx_exporter.py +++ b/examples/research_projects/onnx/summarization/run_onnx_exporter.py @@ -13,9 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" +""" """ -""" import argparse import logging import os diff --git a/examples/research_projects/performer/run_mlm_performer.py b/examples/research_projects/performer/run_mlm_performer.py index 4261d9c184..0332fe1575 100644 --- a/examples/research_projects/performer/run_mlm_performer.py +++ b/examples/research_projects/performer/run_mlm_performer.py @@ -19,6 +19,7 @@ text file or a dataset. Here is the full list of checkpoints on the hub that can be fine-tuned by this script: https://huggingface.co/models?filter=fill-mask """ + import logging import os import sys diff --git a/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py b/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py index 677b9c7860..7a8ea2109b 100755 --- a/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py +++ b/examples/research_projects/quantization-qdqbert/evaluate-hf-trt-qa.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet).""" +"""Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet).""" + import argparse import logging import os diff --git a/examples/research_projects/quantization-qdqbert/quant_trainer.py b/examples/research_projects/quantization-qdqbert/quant_trainer.py index 09bac19e92..132aa28490 100755 --- a/examples/research_projects/quantization-qdqbert/quant_trainer.py +++ b/examples/research_projects/quantization-qdqbert/quant_trainer.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Helper functions for training models with pytorch-quantization""" + import logging import re diff --git a/examples/research_projects/quantization-qdqbert/utils_qa.py b/examples/research_projects/quantization-qdqbert/utils_qa.py index fd0bc16f7e..e90d6c4747 100644 --- a/examples/research_projects/quantization-qdqbert/utils_qa.py +++ b/examples/research_projects/quantization-qdqbert/utils_qa.py @@ -15,6 +15,7 @@ """ Post-processing utilities for question answering. """ + import collections import json import logging diff --git a/examples/research_projects/rag-end2end-retriever/eval_rag.py b/examples/research_projects/rag-end2end-retriever/eval_rag.py index a8e7abbca6..55f4da5657 100644 --- a/examples/research_projects/rag-end2end-retriever/eval_rag.py +++ b/examples/research_projects/rag-end2end-retriever/eval_rag.py @@ -1,4 +1,4 @@ -""" Evaluation script for RAG models.""" +"""Evaluation script for RAG models.""" import argparse import ast diff --git a/examples/research_projects/rag/eval_rag.py b/examples/research_projects/rag/eval_rag.py index a8e7abbca6..55f4da5657 100644 --- a/examples/research_projects/rag/eval_rag.py +++ b/examples/research_projects/rag/eval_rag.py @@ -1,4 +1,4 @@ -""" Evaluation script for RAG models.""" +"""Evaluation script for RAG models.""" import argparse import ast diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py index ebf33eb01d..0f7abde37e 100755 --- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py +++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition""" +"""Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition""" import functools import json diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py index 8a8eda851b..0fb567aba0 100644 --- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py +++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition in streaming mode""" +"""Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition in streaming mode""" import logging import os diff --git a/examples/research_projects/visual_bert/modeling_frcnn.py b/examples/research_projects/visual_bert/modeling_frcnn.py index 499de53207..8aea9b5e1a 100644 --- a/examples/research_projects/visual_bert/modeling_frcnn.py +++ b/examples/research_projects/visual_bert/modeling_frcnn.py @@ -1,20 +1,21 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal - Adapted From Facebook Inc, Detectron2 && Huggingface Co. +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal +Adapted From Facebook Inc, Detectron2 && Huggingface Co. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ import itertools import math import os diff --git a/examples/research_projects/visual_bert/processing_image.py b/examples/research_projects/visual_bert/processing_image.py index 4343cfdbce..65f8f6cd37 100644 --- a/examples/research_projects/visual_bert/processing_image.py +++ b/examples/research_projects/visual_bert/processing_image.py @@ -1,20 +1,21 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal - Adapted From Facebook Inc, Detectron2 +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal +Adapted From Facebook Inc, Detectron2 - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ import sys from typing import Tuple diff --git a/examples/research_projects/visual_bert/utils.py b/examples/research_projects/visual_bert/utils.py index c75f523a08..995fbd2c19 100644 --- a/examples/research_projects/visual_bert/utils.py +++ b/examples/research_projects/visual_bert/utils.py @@ -1,20 +1,20 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal, Huggingface team :) - Adapted From Facebook Inc, Detectron2 +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal, Huggingface team :) +Adapted From Facebook Inc, Detectron2 - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" import copy import fnmatch diff --git a/examples/research_projects/visual_bert/visualizing_image.py b/examples/research_projects/visual_bert/visualizing_image.py index 163d661e87..dcfd8426ff 100644 --- a/examples/research_projects/visual_bert/visualizing_image.py +++ b/examples/research_projects/visual_bert/visualizing_image.py @@ -1,20 +1,21 @@ """ - coding=utf-8 - Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal - Adapted From Facebook Inc, Detectron2 +coding=utf-8 +Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal +Adapted From Facebook Inc, Detectron2 - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.import copy +""" - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.import copy - """ import colorsys import io diff --git a/examples/research_projects/xtreme-s/run_xtreme_s.py b/examples/research_projects/xtreme-s/run_xtreme_s.py index e01ccbf448..a467b3c6eb 100644 --- a/examples/research_projects/xtreme-s/run_xtreme_s.py +++ b/examples/research_projects/xtreme-s/run_xtreme_s.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -""" Fine-tuning a 🤗 Transformers pretrained speech model on the XTREME-S benchmark tasks""" +"""Fine-tuning a 🤗 Transformers pretrained speech model on the XTREME-S benchmark tasks""" import json import logging diff --git a/examples/tensorflow/benchmarking/plot_csv_file.py b/examples/tensorflow/benchmarking/plot_csv_file.py index 9a9ad9c670..aa092f5c04 100644 --- a/examples/tensorflow/benchmarking/plot_csv_file.py +++ b/examples/tensorflow/benchmarking/plot_csv_file.py @@ -93,14 +93,14 @@ class Plot: self.result_dict[model_name]["seq_len"].append(int(row["sequence_length"])) if can_convert_to_int(row["result"]): # value is not None - self.result_dict[model_name]["result"][ - (int(row["batch_size"]), int(row["sequence_length"])) - ] = int(row["result"]) + self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = ( + int(row["result"]) + ) elif can_convert_to_float(row["result"]): # value is not None - self.result_dict[model_name]["result"][ - (int(row["batch_size"]), int(row["sequence_length"])) - ] = float(row["result"]) + self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = ( + float(row["result"]) + ) def plot(self): fig, ax = plt.subplots() diff --git a/examples/tensorflow/benchmarking/run_benchmark_tf.py b/examples/tensorflow/benchmarking/run_benchmark_tf.py index 25aabc5f51..4a43cf7d91 100755 --- a/examples/tensorflow/benchmarking/run_benchmark_tf.py +++ b/examples/tensorflow/benchmarking/run_benchmark_tf.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Benchmarking the library on inference and training in TensorFlow""" +"""Benchmarking the library on inference and training in TensorFlow""" from transformers import HfArgumentParser, TensorFlowBenchmark, TensorFlowBenchmarkArguments diff --git a/examples/tensorflow/question-answering/utils_qa.py b/examples/tensorflow/question-answering/utils_qa.py index 23a46370d1..79497dbb81 100644 --- a/examples/tensorflow/question-answering/utils_qa.py +++ b/examples/tensorflow/question-answering/utils_qa.py @@ -15,6 +15,7 @@ """ Post-processing utilities for question answering. """ + import collections import json import logging diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index d21bd1ec46..9f3893e887 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for sequence classification on GLUE.""" +"""Finetuning the library models for sequence classification on GLUE.""" # You can also adapt this script on your own text classification task. Pointers for this are left as comments. import json diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index e4f0644d12..379f367403 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning the library models for sequence classification.""" +"""Fine-tuning the library models for sequence classification.""" # You can also adapt this script on your own text classification task. Pointers for this are left as comments. import json diff --git a/setup.py b/setup.py index efdb5342d0..9ad7c84b58 100644 --- a/setup.py +++ b/setup.py @@ -156,7 +156,7 @@ _deps = [ "rhoknp>=1.1.0,<1.3.1", "rjieba", "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1", - "ruff==0.1.5", + "ruff==0.4.4", "sacrebleu>=1.4.12,<2.0.0", "sacremoses", "safetensors>=0.4.1", diff --git a/src/transformers/audio_utils.py b/src/transformers/audio_utils.py index c5c73550c1..4dc408bfa2 100644 --- a/src/transformers/audio_utils.py +++ b/src/transformers/audio_utils.py @@ -16,6 +16,7 @@ Audio processing functions to extract features from audio waveforms. This code is pure numpy to support all frameworks and remove unnecessary dependencies. """ + import warnings from typing import Optional, Tuple, Union diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py index 3c5c877a45..3d4f588a5b 100644 --- a/src/transformers/benchmark/benchmark.py +++ b/src/transformers/benchmark/benchmark.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Benchmarking the library on inference and training in PyTorch. +Benchmarking the library on inference and training in PyTorch. """ - import timeit from typing import Callable, Optional diff --git a/src/transformers/benchmark/benchmark_tf.py b/src/transformers/benchmark/benchmark_tf.py index c813591be0..f680222955 100644 --- a/src/transformers/benchmark/benchmark_tf.py +++ b/src/transformers/benchmark/benchmark_tf.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Benchmarking the library on inference and training in PyTorch. +Benchmarking the library on inference and training in PyTorch. """ - import random import timeit from functools import wraps diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py index a71b1fb65a..a721f98cfd 100644 --- a/src/transformers/benchmark/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -249,7 +249,6 @@ def measure_peak_memory_cpu(function: Callable[[], None], interval=0.5, device_i else: class MemoryMeasureProcess(Process): - """ `MemoryMeasureProcess` inherits from `Process` and overwrites its `run()` method. Used to measure the memory usage of a process diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 86b946d162..b3d76aff9c 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Configuration base class and utilities.""" - +"""Configuration base class and utilities.""" import copy import json diff --git a/src/transformers/convert_graph_to_onnx.py b/src/transformers/convert_graph_to_onnx.py index e3270bb9de..051f1d148a 100644 --- a/src/transformers/convert_graph_to_onnx.py +++ b/src/transformers/convert_graph_to_onnx.py @@ -400,7 +400,7 @@ def optimize(onnx_model_path: Path) -> Path: sess_option.optimized_model_filepath = opt_model_path.as_posix() _ = InferenceSession(onnx_model_path.as_posix(), sess_option) - print(f"Optimized model has been written at {opt_model_path}: \N{heavy check mark}") + print(f"Optimized model has been written at {opt_model_path}: \N{HEAVY CHECK MARK}") print("/!\\ Optimized model contains hardware specific operators which might not be portable. /!\\") return opt_model_path @@ -475,7 +475,7 @@ def quantize(onnx_model_path: Path) -> Path: quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized") # Save model - print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}") + print(f"Quantized model has been written at {quantized_model_path}: \N{HEAVY CHECK MARK}") onnx.save_model(quantizer.model.model, quantized_model_path.as_posix()) return quantized_model_path @@ -489,9 +489,9 @@ def verify(path: Path): try: onnx_options = SessionOptions() _ = InferenceSession(path.as_posix(), onnx_options, providers=["CPUExecutionProvider"]) - print(f"Model {path} correctly loaded: \N{heavy check mark}") + print(f"Model {path} correctly loaded: \N{HEAVY CHECK MARK}") except RuntimeException as re: - print(f"Error while loading the model {re}: \N{heavy ballot x}") + print(f"Error while loading the model {re}: \N{HEAVY BALLOT X}") if __name__ == "__main__": diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index c544c8c9e1..3875879f0e 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Convert pytorch checkpoints to TensorFlow""" - +"""Convert pytorch checkpoints to TensorFlow""" import argparse import os diff --git a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py index a032ee93b0..cddf18951d 100755 --- a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py +++ b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Convert slow tokenizers checkpoints in fast (serialization format of the `tokenizers` library)""" +"""Convert slow tokenizers checkpoints in fast (serialization format of the `tokenizers` library)""" import argparse import os diff --git a/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py b/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py index 2b003d4bc4..8ccb033b3d 100755 --- a/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py +++ b/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Seq2Seq TF Hub checkpoint.""" - import argparse from . import ( diff --git a/src/transformers/data/metrics/squad_metrics.py b/src/transformers/data/metrics/squad_metrics.py index 6eea34ad9e..5d98a0bfcf 100644 --- a/src/transformers/data/metrics/squad_metrics.py +++ b/src/transformers/data/metrics/squad_metrics.py @@ -20,7 +20,6 @@ additional na_prob.json file is provided. This file is expected to map question probability that a question is unanswerable. """ - import collections import json import math diff --git a/src/transformers/data/processors/glue.py b/src/transformers/data/processors/glue.py index 3d22968c9d..6e95a66684 100644 --- a/src/transformers/data/processors/glue.py +++ b/src/transformers/data/processors/glue.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GLUE processors and helpers""" +"""GLUE processors and helpers""" import os import warnings diff --git a/src/transformers/data/processors/xnli.py b/src/transformers/data/processors/xnli.py index 3f1a11fcd6..459c5bc3a6 100644 --- a/src/transformers/data/processors/xnli.py +++ b/src/transformers/data/processors/xnli.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XNLI utils (dataset loading and evaluation)""" - +"""XNLI utils (dataset loading and evaluation)""" import os diff --git a/src/transformers/deepspeed.py b/src/transformers/deepspeed.py index 840d9cc2f5..6fd22d8c5c 100644 --- a/src/transformers/deepspeed.py +++ b/src/transformers/deepspeed.py @@ -17,6 +17,7 @@ in `integrations/deepspeed` instead. Check: https://github.com/huggingface/transformers/pull/25599 """ + import warnings diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index ceca99d450..2e48650b9c 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -62,7 +62,7 @@ deps = { "rhoknp": "rhoknp>=1.1.0,<1.3.1", "rjieba": "rjieba", "rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1", - "ruff": "ruff==0.1.5", + "ruff": "ruff==0.4.4", "sacrebleu": "sacrebleu>=1.4.12,<2.0.0", "sacremoses": "sacremoses", "safetensors": "safetensors>=0.4.1", diff --git a/src/transformers/dynamic_module_utils.py b/src/transformers/dynamic_module_utils.py index d80ffdc58c..9de22a3592 100644 --- a/src/transformers/dynamic_module_utils.py +++ b/src/transformers/dynamic_module_utils.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Utilities to dynamically load objects from the Hub.""" + import filecmp import importlib import importlib.util diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py index 40717d9931..f74a3f0c40 100644 --- a/src/transformers/feature_extraction_sequence_utils.py +++ b/src/transformers/feature_extraction_sequence_utils.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Sequence feature extraction class for common feature extractors to preprocess sequences. +Sequence feature extraction class for common feature extractors to preprocess sequences. """ + from typing import Dict, List, Optional, Union import numpy as np diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index 44e01f8d85..46125b8fa7 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Feature extraction saving/loading class for common feature extractors. +Feature extraction saving/loading class for common feature extractors. """ import copy diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index 9a5fee5207..eb14c60d9a 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Generation configuration class and utilities.""" +"""Generation configuration class and utilities.""" import copy import json diff --git a/src/transformers/generation/stopping_criteria.py b/src/transformers/generation/stopping_criteria.py index 5fd1c2f773..14da9e697a 100644 --- a/src/transformers/generation/stopping_criteria.py +++ b/src/transformers/generation/stopping_criteria.py @@ -387,9 +387,9 @@ class StopStringCriteria(StoppingCriteria): # Since this is lots of very small assignments of lists, we build it with numpy rather # than torch for speed + simplicity, then convert to torch at the end for token_idx, valid_positions in positions.items(): - gather_vec[ - token_idx, max_valid_positions * i : max_valid_positions * i + len(valid_positions) - ] = valid_positions + gather_vec[token_idx, max_valid_positions * i : max_valid_positions * i + len(valid_positions)] = ( + valid_positions + ) for token_idx, possible_end_lens in end_lens.items(): gather_vec[ token_idx, diff --git a/src/transformers/generation/watermarking.py b/src/transformers/generation/watermarking.py index 297d388d54..e998d996ec 100644 --- a/src/transformers/generation/watermarking.py +++ b/src/transformers/generation/watermarking.py @@ -68,7 +68,6 @@ class WatermarkDetectorOutput: class WatermarkDetector: - r""" Detector for detection of watermark generated text. The detector needs to be given the exact same settings that were given during text generation to replicate the watermark greenlist generation and so detect the watermark. This includes diff --git a/src/transformers/integrations/aqlm.py b/src/transformers/integrations/aqlm.py index 903d0ecdae..ee4bbcb896 100644 --- a/src/transformers/integrations/aqlm.py +++ b/src/transformers/integrations/aqlm.py @@ -13,7 +13,6 @@ # limitations under the License. "AQLM (Additive Quantization of Language Model) integration file" - from ..utils import is_accelerate_available, is_aqlm_available, is_torch_available diff --git a/src/transformers/integrations/awq.py b/src/transformers/integrations/awq.py index a83b27e95a..30427aa405 100644 --- a/src/transformers/integrations/awq.py +++ b/src/transformers/integrations/awq.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. "AWQ (Activation aware Weight Quantization) integration file" + from ..activations import ACT2FN from ..modeling_utils import PreTrainedModel from ..utils import is_auto_awq_available, is_torch_available, logging diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index 4754c37a1e..aae1204acf 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -14,6 +14,7 @@ """ Integration with Deepspeed """ + import copy import importlib.metadata as importlib_metadata import importlib.util diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py index 13660a0a81..3907d80a2a 100644 --- a/src/transformers/integrations/ggml.py +++ b/src/transformers/integrations/ggml.py @@ -17,6 +17,7 @@ Integration with GGML / The file is copied and adapted from https://github.com/99991/pygguf with extra methods beings exposed """ + from array import array import numpy as np diff --git a/src/transformers/integrations/integration_utils.py b/src/transformers/integrations/integration_utils.py index 39de7d6d32..66f8c1da8f 100755 --- a/src/transformers/integrations/integration_utils.py +++ b/src/transformers/integrations/integration_utils.py @@ -14,6 +14,7 @@ """ Integrations with other Python libraries. """ + import functools import importlib.metadata import importlib.util diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index 4776737a37..60394f569c 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Configuration base class and utilities.""" - +"""Configuration base class and utilities.""" import copy import json diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py index aceb462d12..4b61f7140c 100644 --- a/src/transformers/modeling_flax_pytorch_utils.py +++ b/src/transformers/modeling_flax_pytorch_utils.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch - Flax general utilities.""" - +"""PyTorch - Flax general utilities.""" import os from pickle import UnpicklingError diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py index 163178929f..7f1367481a 100644 --- a/src/transformers/modeling_tf_pytorch_utils.py +++ b/src/transformers/modeling_tf_pytorch_utils.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch - TF 2.0 general utilities.""" - +"""PyTorch - TF 2.0 general utilities.""" import os import re diff --git a/src/transformers/models/albert/configuration_albert.py b/src/transformers/models/albert/configuration_albert.py index 492ca2f65b..bae88486e1 100644 --- a/src/transformers/models/albert/configuration_albert.py +++ b/src/transformers/models/albert/configuration_albert.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ALBERT model configuration""" +"""ALBERT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py index eecada8b43..df2a226101 100644 --- a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ALBERT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index ad2c2d637a..3a50eeb20e 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 ALBERT model.""" - +"""TF 2.0 ALBERT model.""" from __future__ import annotations diff --git a/src/transformers/models/albert/tokenization_albert.py b/src/transformers/models/albert/tokenization_albert.py index 786f9eeafc..4068c7aad8 100644 --- a/src/transformers/models/albert/tokenization_albert.py +++ b/src/transformers/models/albert/tokenization_albert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for ALBERT model.""" - +"""Tokenization classes for ALBERT model.""" import os import unicodedata diff --git a/src/transformers/models/albert/tokenization_albert_fast.py b/src/transformers/models/albert/tokenization_albert_fast.py index e0b09a7356..eadfdcecfc 100644 --- a/src/transformers/models/albert/tokenization_albert_fast.py +++ b/src/transformers/models/albert/tokenization_albert_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for ALBERT model.""" - +"""Tokenization classes for ALBERT model.""" import os from shutil import copyfile diff --git a/src/transformers/models/align/configuration_align.py b/src/transformers/models/align/configuration_align.py index 9e96f5d15a..199b51153a 100644 --- a/src/transformers/models/align/configuration_align.py +++ b/src/transformers/models/align/configuration_align.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ALIGN model configuration""" +"""ALIGN model configuration""" import os from typing import TYPE_CHECKING, List, Union diff --git a/src/transformers/models/align/modeling_align.py b/src/transformers/models/align/modeling_align.py index 08de9aa14f..d6e6023a26 100644 --- a/src/transformers/models/align/modeling_align.py +++ b/src/transformers/models/align/modeling_align.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ALIGN model.""" +"""PyTorch ALIGN model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/align/processing_align.py b/src/transformers/models/align/processing_align.py index 8bcea7eb5d..3bc97afd1c 100644 --- a/src/transformers/models/align/processing_align.py +++ b/src/transformers/models/align/processing_align.py @@ -16,7 +16,6 @@ Image/Text processor class for ALIGN """ - from ...processing_utils import ProcessorMixin from ...tokenization_utils_base import BatchEncoding diff --git a/src/transformers/models/altclip/configuration_altclip.py b/src/transformers/models/altclip/configuration_altclip.py index d6e533e1f8..3195d43e0b 100755 --- a/src/transformers/models/altclip/configuration_altclip.py +++ b/src/transformers/models/altclip/configuration_altclip.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" AltCLIP model configuration""" +"""AltCLIP model configuration""" + import os from typing import Union diff --git a/src/transformers/models/altclip/modeling_altclip.py b/src/transformers/models/altclip/modeling_altclip.py index dfccab53ea..6bffdc70a5 100755 --- a/src/transformers/models/altclip/modeling_altclip.py +++ b/src/transformers/models/altclip/modeling_altclip.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch AltCLIP model.""" +"""PyTorch AltCLIP model.""" + import math from dataclasses import dataclass from typing import Any, List, Optional, Tuple, Union diff --git a/src/transformers/models/altclip/processing_altclip.py b/src/transformers/models/altclip/processing_altclip.py index 9518c55d40..2814b2d7f2 100644 --- a/src/transformers/models/altclip/processing_altclip.py +++ b/src/transformers/models/altclip/processing_altclip.py @@ -15,6 +15,7 @@ """ Image/Text processor class for AltCLIP """ + import warnings from ...processing_utils import ProcessorMixin diff --git a/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py b/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py index 158f1ee5d9..9e1d995dc2 100644 --- a/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +++ b/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Audio Spectogram Transformer (AST) model configuration""" - +"""Audio Spectogram Transformer (AST) model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py b/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py index 32e0f33d04..2f75d07592 100644 --- a/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py +++ b/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Audio Spectrogram Transformer checkpoints from the original repository. URL: https://github.com/YuanGongND/ast""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py b/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py index 523ab85f14..beb249202b 100644 --- a/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +++ b/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Audio Spectrogram Transformer (AST) model.""" +"""PyTorch Audio Spectrogram Transformer (AST) model.""" import math from typing import Dict, List, Optional, Set, Tuple, Union diff --git a/src/transformers/models/auto/auto_factory.py b/src/transformers/models/auto/auto_factory.py index de813c43cf..6b572b2527 100644 --- a/src/transformers/models/auto/auto_factory.py +++ b/src/transformers/models/auto/auto_factory.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Factory function to build auto-model classes.""" + import copy import importlib import json diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index d48b4a2005..464f80e2d9 100755 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Auto Config class.""" +"""Auto Config class.""" + import importlib import os import re diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 1dbb4eb7dc..34cb1824c1 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" AutoFeatureExtractor class.""" +"""AutoFeatureExtractor class.""" + import importlib import json import os diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 41a1fcf960..eb21b58e20 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" AutoImageProcessor class.""" +"""AutoImageProcessor class.""" + import importlib import json import os diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index c16ab83bdc..adfcc7af9f 100755 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Auto Model class.""" +"""Auto Model class.""" import warnings from collections import OrderedDict diff --git a/src/transformers/models/auto/modeling_flax_auto.py b/src/transformers/models/auto/modeling_flax_auto.py index f8e62bf0f2..310cf5b287 100644 --- a/src/transformers/models/auto/modeling_flax_auto.py +++ b/src/transformers/models/auto/modeling_flax_auto.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Auto Model class.""" - +"""Auto Model class.""" from collections import OrderedDict diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index 756da20dbc..2004495756 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Auto Model class.""" - +"""Auto Model class.""" import warnings from collections import OrderedDict diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py index 5b5ef98bdc..4a8295cc83 100644 --- a/src/transformers/models/auto/processing_auto.py +++ b/src/transformers/models/auto/processing_auto.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" AutoProcessor class.""" +"""AutoProcessor class.""" + import importlib import inspect import json diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index ff323ff388..e99bc89205 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Auto Tokenizer class.""" +"""Auto Tokenizer class.""" import importlib import json diff --git a/src/transformers/models/autoformer/configuration_autoformer.py b/src/transformers/models/autoformer/configuration_autoformer.py index 4f499fd1cb..09b06f95c3 100644 --- a/src/transformers/models/autoformer/configuration_autoformer.py +++ b/src/transformers/models/autoformer/configuration_autoformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Autoformer model configuration""" +"""Autoformer model configuration""" from typing import List, Optional diff --git a/src/transformers/models/autoformer/modeling_autoformer.py b/src/transformers/models/autoformer/modeling_autoformer.py index d7e1d7a4a3..5a5b5f2439 100644 --- a/src/transformers/models/autoformer/modeling_autoformer.py +++ b/src/transformers/models/autoformer/modeling_autoformer.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Autoformer model.""" +"""PyTorch Autoformer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/bark/configuration_bark.py b/src/transformers/models/bark/configuration_bark.py index a6bf2b546a..6dd08b65e8 100644 --- a/src/transformers/models/bark/configuration_bark.py +++ b/src/transformers/models/bark/configuration_bark.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BARK model configuration""" +"""BARK model configuration""" import os from typing import Dict, Optional, Union diff --git a/src/transformers/models/bark/convert_suno_to_hf.py b/src/transformers/models/bark/convert_suno_to_hf.py index 4720a70d5c..880debe60a 100644 --- a/src/transformers/models/bark/convert_suno_to_hf.py +++ b/src/transformers/models/bark/convert_suno_to_hf.py @@ -1,4 +1,5 @@ """Convert Bark checkpoint.""" + import argparse import os from pathlib import Path diff --git a/src/transformers/models/bark/generation_configuration_bark.py b/src/transformers/models/bark/generation_configuration_bark.py index 7d7d98449d..b03fd6796a 100644 --- a/src/transformers/models/bark/generation_configuration_bark.py +++ b/src/transformers/models/bark/generation_configuration_bark.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BARK model generation configuration""" +"""BARK model generation configuration""" import copy from typing import Dict diff --git a/src/transformers/models/bark/modeling_bark.py b/src/transformers/models/bark/modeling_bark.py index 0690c96e6a..9a9fa33d97 100644 --- a/src/transformers/models/bark/modeling_bark.py +++ b/src/transformers/models/bark/modeling_bark.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BARK model.""" +"""PyTorch BARK model.""" + import math from typing import Dict, Optional, Tuple, Union diff --git a/src/transformers/models/bark/processing_bark.py b/src/transformers/models/bark/processing_bark.py index bb7690f9a1..a9bf55b51f 100644 --- a/src/transformers/models/bark/processing_bark.py +++ b/src/transformers/models/bark/processing_bark.py @@ -15,6 +15,7 @@ """ Processor class for Bark """ + import json import os from typing import Optional diff --git a/src/transformers/models/bart/configuration_bart.py b/src/transformers/models/bart/configuration_bart.py index 1a6214c2ee..a3bc7f3865 100644 --- a/src/transformers/models/bart/configuration_bart.py +++ b/src/transformers/models/bart/configuration_bart.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BART model configuration""" +"""BART model configuration""" + import warnings from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py index d09b39d51e..e694d96ca0 100644 --- a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BART checkpoint.""" - import argparse import os from pathlib import Path diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index bcc9ac3e07..e3b2f8a61b 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BART model.""" +"""PyTorch BART model.""" + import copy import math import warnings diff --git a/src/transformers/models/bart/modeling_flax_bart.py b/src/transformers/models/bart/modeling_flax_bart.py index 6abfcdc398..507a93a8e7 100644 --- a/src/transformers/models/bart/modeling_flax_bart.py +++ b/src/transformers/models/bart/modeling_flax_bart.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax Bart model.""" +"""Flax Bart model.""" import math import random diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 1e38908b4a..5ebde8cba6 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Bart model.""" - +"""TF 2.0 Bart model.""" from __future__ import annotations diff --git a/src/transformers/models/barthez/tokenization_barthez.py b/src/transformers/models/barthez/tokenization_barthez.py index d9bd67cf51..46decddb3e 100644 --- a/src/transformers/models/barthez/tokenization_barthez.py +++ b/src/transformers/models/barthez/tokenization_barthez.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for the BARThez model.""" - +"""Tokenization classes for the BARThez model.""" import os from shutil import copyfile diff --git a/src/transformers/models/barthez/tokenization_barthez_fast.py b/src/transformers/models/barthez/tokenization_barthez_fast.py index e988b0d518..df8cc7757e 100644 --- a/src/transformers/models/barthez/tokenization_barthez_fast.py +++ b/src/transformers/models/barthez/tokenization_barthez_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for the BARThez model.""" - +"""Tokenization classes for the BARThez model.""" import os from shutil import copyfile diff --git a/src/transformers/models/bartpho/tokenization_bartpho.py b/src/transformers/models/bartpho/tokenization_bartpho.py index d936be41c2..df121f26e2 100644 --- a/src/transformers/models/bartpho/tokenization_bartpho.py +++ b/src/transformers/models/bartpho/tokenization_bartpho.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for BARTpho-syllable model.""" - +"""Tokenization classes for BARTpho-syllable model.""" import os from shutil import copyfile diff --git a/src/transformers/models/beit/configuration_beit.py b/src/transformers/models/beit/configuration_beit.py index 4664bdc898..6ff00b2b87 100644 --- a/src/transformers/models/beit/configuration_beit.py +++ b/src/transformers/models/beit/configuration_beit.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BEiT model configuration""" +"""BEiT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py b/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py index 757113c8a6..c2e366d7dd 100644 --- a/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py +++ b/src/transformers/models/beit/convert_beit_unilm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BEiT checkpoints from the unilm repository.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/beit/modeling_beit.py b/src/transformers/models/beit/modeling_beit.py index d6c5a164d7..a9b38d4ee3 100755 --- a/src/transformers/models/beit/modeling_beit.py +++ b/src/transformers/models/beit/modeling_beit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BEiT model.""" - +"""PyTorch BEiT model.""" import collections.abc import math diff --git a/src/transformers/models/bert/configuration_bert.py b/src/transformers/models/bert/configuration_bert.py index 7ad3536c96..613cf6a114 100644 --- a/src/transformers/models/bert/configuration_bert.py +++ b/src/transformers/models/bert/configuration_bert.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BERT model configuration""" +"""BERT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py index 40533ede43..9dfd8da474 100644 --- a/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py @@ -24,6 +24,7 @@ You may adapt this script to include classification/MLM/NSP/etc. heads. Note: This script is only working with an older version of the TensorFlow models repository (<= v2.3.0). Models trained with never versions are not compatible with this script. """ + import argparse import os import re diff --git a/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py index 09c4e3ee6c..be904ddd7e 100755 --- a/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BERT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/bert/convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py index 651847aee7..cba1e1a2c3 100644 --- a/src/transformers/models/bert/convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py @@ -18,6 +18,7 @@ model. The official implementation of "Token Dropping" can be found in the Tenso https://github.com/tensorflow/models/tree/master/official/projects/token_dropping """ + import argparse import tensorflow as tf diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 89885887c9..16dc2fc205 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 BERT model.""" - +"""TF 2.0 BERT model.""" from __future__ import annotations diff --git a/src/transformers/models/bert/tokenization_bert.py b/src/transformers/models/bert/tokenization_bert.py index f645d7c08a..a8f1274663 100644 --- a/src/transformers/models/bert/tokenization_bert.py +++ b/src/transformers/models/bert/tokenization_bert.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for Bert.""" - import collections import os import unicodedata diff --git a/src/transformers/models/bert_generation/configuration_bert_generation.py b/src/transformers/models/bert_generation/configuration_bert_generation.py index 841aec5c0f..d1d1b51b65 100644 --- a/src/transformers/models/bert_generation/configuration_bert_generation.py +++ b/src/transformers/models/bert_generation/configuration_bert_generation.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BertGeneration model configuration""" +"""BertGeneration model configuration""" from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/bert_generation/tokenization_bert_generation.py b/src/transformers/models/bert_generation/tokenization_bert_generation.py index 772eb123c3..b1adb9b62b 100644 --- a/src/transformers/models/bert_generation/tokenization_bert_generation.py +++ b/src/transformers/models/bert_generation/tokenization_bert_generation.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model BertGeneration.""" - +"""Tokenization class for model BertGeneration.""" import os from shutil import copyfile diff --git a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py index fe5cd06f7f..58ff3d2b83 100644 --- a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py +++ b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes.""" - import collections import copy import os diff --git a/src/transformers/models/bertweet/tokenization_bertweet.py b/src/transformers/models/bertweet/tokenization_bertweet.py index 7f14ed61da..f478dd0832 100644 --- a/src/transformers/models/bertweet/tokenization_bertweet.py +++ b/src/transformers/models/bertweet/tokenization_bertweet.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for BERTweet""" - +"""Tokenization classes for BERTweet""" import html import os diff --git a/src/transformers/models/big_bird/configuration_big_bird.py b/src/transformers/models/big_bird/configuration_big_bird.py index dfd36d82c3..cbcf2e6bf5 100644 --- a/src/transformers/models/big_bird/configuration_big_bird.py +++ b/src/transformers/models/big_bird/configuration_big_bird.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BigBird model configuration""" +"""BigBird model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py index 34db9771b1..0b8e6590f9 100644 --- a/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BigBird checkpoint.""" - import argparse from transformers import BigBirdConfig, BigBirdForPreTraining, BigBirdForQuestionAnswering, load_tf_weights_in_big_bird diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 39144c77e2..f73ab9e51f 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BigBird model.""" - +"""PyTorch BigBird model.""" import math import os @@ -919,11 +918,9 @@ class BigBirdBlockSparseAttention(nn.Module): attention_probs[:, :, -2 * from_block_size : -from_block_size, :to_block_size] = second_last_attn_weights[ :, :, :, :to_block_size ] # 1st key block (global) - attention_probs[ - :, :, -2 * from_block_size : -from_block_size, -3 * to_block_size : - ] = second_last_attn_weights[ - :, :, :, to_block_size : 4 * to_block_size - ] # last three blocks (global + sliding) + attention_probs[:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :] = ( + second_last_attn_weights[:, :, :, to_block_size : 4 * to_block_size] + ) # last three blocks (global + sliding) # random keys for p1, i1, w1 in zip(range(bsz), rand_attn, second_last_attn_weights): # p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch diff --git a/src/transformers/models/big_bird/tokenization_big_bird.py b/src/transformers/models/big_bird/tokenization_big_bird.py index 58dc57ef6d..e435477ef3 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird.py +++ b/src/transformers/models/big_bird/tokenization_big_bird.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for BigBird.""" - import os import re from shutil import copyfile diff --git a/src/transformers/models/big_bird/tokenization_big_bird_fast.py b/src/transformers/models/big_bird/tokenization_big_bird_fast.py index fa37cd4ac7..f4ccbb8b17 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird_fast.py +++ b/src/transformers/models/big_bird/tokenization_big_bird_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for Big Bird model.""" - +"""Tokenization classes for Big Bird model.""" import os from shutil import copyfile diff --git a/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py index c548573f32..9de2a7267a 100644 --- a/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BigBirdPegasus model configuration""" +"""BigBirdPegasus model configuration""" from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index b4e6419f99..d1ba54213a 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BigBirdPegasus model.""" +"""PyTorch BigBirdPegasus model.""" import copy import math @@ -717,11 +717,9 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): attention_probs[:, :, -2 * from_block_size : -from_block_size, :to_block_size] = second_last_attn_weights[ :, :, :, :to_block_size ] # 1st key block (global) - attention_probs[ - :, :, -2 * from_block_size : -from_block_size, -3 * to_block_size : - ] = second_last_attn_weights[ - :, :, :, to_block_size : 4 * to_block_size - ] # last three blocks (global + sliding) + attention_probs[:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :] = ( + second_last_attn_weights[:, :, :, to_block_size : 4 * to_block_size] + ) # last three blocks (global + sliding) # random keys for p1, i1, w1 in zip(range(bsz), rand_attn, second_last_attn_weights): # p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch diff --git a/src/transformers/models/biogpt/configuration_biogpt.py b/src/transformers/models/biogpt/configuration_biogpt.py index 936fee7632..18f7b6d6bf 100644 --- a/src/transformers/models/biogpt/configuration_biogpt.py +++ b/src/transformers/models/biogpt/configuration_biogpt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BioGPT model configuration""" +"""BioGPT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/biogpt/modeling_biogpt.py b/src/transformers/models/biogpt/modeling_biogpt.py index ad1a5df715..ae46a6ff07 100755 --- a/src/transformers/models/biogpt/modeling_biogpt.py +++ b/src/transformers/models/biogpt/modeling_biogpt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BioGPT model.""" - +"""PyTorch BioGPT model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/biogpt/tokenization_biogpt.py b/src/transformers/models/biogpt/tokenization_biogpt.py index e16742ec5a..f9760eb604 100644 --- a/src/transformers/models/biogpt/tokenization_biogpt.py +++ b/src/transformers/models/biogpt/tokenization_biogpt.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for BioGPT.""" + import json import os from typing import List, Optional, Tuple diff --git a/src/transformers/models/bit/configuration_bit.py b/src/transformers/models/bit/configuration_bit.py index f1532a74b9..8f4326a2d5 100644 --- a/src/transformers/models/bit/configuration_bit.py +++ b/src/transformers/models/bit/configuration_bit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BiT model configuration""" +"""BiT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/bit/convert_bit_to_pytorch.py b/src/transformers/models/bit/convert_bit_to_pytorch.py index 7cc7f64107..abc24290ab 100644 --- a/src/transformers/models/bit/convert_bit_to_pytorch.py +++ b/src/transformers/models/bit/convert_bit_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BiT checkpoints from the timm library.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/bit/modeling_bit.py b/src/transformers/models/bit/modeling_bit.py index af96150dc9..d015db4956 100644 --- a/src/transformers/models/bit/modeling_bit.py +++ b/src/transformers/models/bit/modeling_bit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BiT model. Also supports backbone for ViT hybrid.""" +"""PyTorch BiT model. Also supports backbone for ViT hybrid.""" import collections import math diff --git a/src/transformers/models/blenderbot/configuration_blenderbot.py b/src/transformers/models/blenderbot/configuration_blenderbot.py index 6b9a12e02e..105d38c255 100644 --- a/src/transformers/models/blenderbot/configuration_blenderbot.py +++ b/src/transformers/models/blenderbot/configuration_blenderbot.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Blenderbot model configuration""" +"""Blenderbot model configuration""" from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index 6fc86bcec4..12d259fde7 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Blenderbot model.""" - +"""PyTorch Blenderbot model.""" import copy import math diff --git a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py index 61239335be..97c9653da3 100644 --- a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax Blenderbot model.""" +"""Flax Blenderbot model.""" import math import random diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index ccb07d20ec..bbfe4726de 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Blenderbot model.""" - +"""TF 2.0 Blenderbot model.""" from __future__ import annotations diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py index 3a5206cdf4..01cbf13809 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Fast Tokenization class for Blenderbot.""" + import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/blenderbot_small/configuration_blenderbot_small.py b/src/transformers/models/blenderbot_small/configuration_blenderbot_small.py index 667db5bd55..6ee26365de 100644 --- a/src/transformers/models/blenderbot_small/configuration_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/configuration_blenderbot_small.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BlenderbotSmall model configuration""" +"""BlenderbotSmall model configuration""" from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index 504f073ed0..aa0e38bd8e 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BlenderbotSmall model.""" - +"""PyTorch BlenderbotSmall model.""" import copy import math diff --git a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py index b5272fb3bc..325ff0a20b 100644 --- a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax BlenderbotSmall model.""" - +"""Flax BlenderbotSmall model.""" import math import random diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 01206831ac..1576462979 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 BlenderbotSmall model.""" - +"""TF 2.0 BlenderbotSmall model.""" from __future__ import annotations diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py index 1f647d2430..a80acdb650 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Fast tokenization class for BlenderbotSmall.""" + from typing import List, Optional from tokenizers import ByteLevelBPETokenizer diff --git a/src/transformers/models/blip/configuration_blip.py b/src/transformers/models/blip/configuration_blip.py index 1a6fe37aa4..1131d598e0 100644 --- a/src/transformers/models/blip/configuration_blip.py +++ b/src/transformers/models/blip/configuration_blip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Blip model configuration""" +"""Blip model configuration""" import os from typing import Union diff --git a/src/transformers/models/blip/modeling_blip.py b/src/transformers/models/blip/modeling_blip.py index 576d4dd5c0..371affa5ac 100644 --- a/src/transformers/models/blip/modeling_blip.py +++ b/src/transformers/models/blip/modeling_blip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BLIP model.""" +"""PyTorch BLIP model.""" import math import warnings diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index 5312cf2323..1557677eb3 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow BLIP model.""" +"""TensorFlow BLIP model.""" from __future__ import annotations diff --git a/src/transformers/models/blip_2/configuration_blip_2.py b/src/transformers/models/blip_2/configuration_blip_2.py index 70dea87d35..14346d5299 100644 --- a/src/transformers/models/blip_2/configuration_blip_2.py +++ b/src/transformers/models/blip_2/configuration_blip_2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BLIP-2 model configuration""" +"""BLIP-2 model configuration""" import os from typing import Union diff --git a/src/transformers/models/blip_2/modeling_blip_2.py b/src/transformers/models/blip_2/modeling_blip_2.py index 8986eabc7d..8fa55d01ee 100644 --- a/src/transformers/models/blip_2/modeling_blip_2.py +++ b/src/transformers/models/blip_2/modeling_blip_2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch BLIP-2 model.""" +"""PyTorch BLIP-2 model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index ddea3f720a..dc9f6d3082 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Bloom configuration""" +"""Bloom configuration""" + from collections import OrderedDict from typing import TYPE_CHECKING, Any, List, Mapping, Optional diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py index eda9a2d815..40ba6240d3 100644 --- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BigScience BLOOM checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/bloom/tokenization_bloom_fast.py b/src/transformers/models/bloom/tokenization_bloom_fast.py index afd3bfaaee..d0da1621d4 100644 --- a/src/transformers/models/bloom/tokenization_bloom_fast.py +++ b/src/transformers/models/bloom/tokenization_bloom_fast.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for Bloom.""" - import pickle from typing import Optional, Tuple diff --git a/src/transformers/models/bridgetower/configuration_bridgetower.py b/src/transformers/models/bridgetower/configuration_bridgetower.py index 8513ce21f7..4985b6ef89 100644 --- a/src/transformers/models/bridgetower/configuration_bridgetower.py +++ b/src/transformers/models/bridgetower/configuration_bridgetower.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND=, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" BridgeTower model configuration""" +"""BridgeTower model configuration""" import os from typing import Union diff --git a/src/transformers/models/bros/configuration_bros.py b/src/transformers/models/bros/configuration_bros.py index 6a1ef6d948..8c2a3cc73a 100644 --- a/src/transformers/models/bros/configuration_bros.py +++ b/src/transformers/models/bros/configuration_bros.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Bros model configuration""" +"""Bros model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/bros/modeling_bros.py b/src/transformers/models/bros/modeling_bros.py index 0f80cec404..c062278309 100755 --- a/src/transformers/models/bros/modeling_bros.py +++ b/src/transformers/models/bros/modeling_bros.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Bros model.""" - +"""PyTorch Bros model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py index 7d9a20f3b0..9b1b15857c 100755 --- a/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert T5 checkpoint.""" - import argparse from transformers import T5Config, T5ForConditionalGeneration, load_tf_weights_in_t5 diff --git a/src/transformers/models/byt5/tokenization_byt5.py b/src/transformers/models/byt5/tokenization_byt5.py index 68c70db0d1..21513ab4cd 100644 --- a/src/transformers/models/byt5/tokenization_byt5.py +++ b/src/transformers/models/byt5/tokenization_byt5.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model ByT5.""" - +"""Tokenization class for model ByT5.""" import warnings from typing import List, Optional, Tuple diff --git a/src/transformers/models/camembert/configuration_camembert.py b/src/transformers/models/camembert/configuration_camembert.py index 124d14abec..b573801200 100644 --- a/src/transformers/models/camembert/configuration_camembert.py +++ b/src/transformers/models/camembert/configuration_camembert.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CamemBERT configuration""" +"""CamemBERT configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/camembert/modeling_tf_camembert.py b/src/transformers/models/camembert/modeling_tf_camembert.py index 9e66f12468..f5ddc2242b 100644 --- a/src/transformers/models/camembert/modeling_tf_camembert.py +++ b/src/transformers/models/camembert/modeling_tf_camembert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 CamemBERT model.""" - +"""TF 2.0 CamemBERT model.""" from __future__ import annotations diff --git a/src/transformers/models/camembert/tokenization_camembert.py b/src/transformers/models/camembert/tokenization_camembert.py index 51d70b198b..113fe1b121 100644 --- a/src/transformers/models/camembert/tokenization_camembert.py +++ b/src/transformers/models/camembert/tokenization_camembert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for Camembert model.""" - +"""Tokenization classes for Camembert model.""" import os from shutil import copyfile diff --git a/src/transformers/models/camembert/tokenization_camembert_fast.py b/src/transformers/models/camembert/tokenization_camembert_fast.py index d1f0db688a..ffec8d98e1 100644 --- a/src/transformers/models/camembert/tokenization_camembert_fast.py +++ b/src/transformers/models/camembert/tokenization_camembert_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Fast tokenization classes for Camembert model.""" - +"""Fast tokenization classes for Camembert model.""" import os from shutil import copyfile diff --git a/src/transformers/models/canine/configuration_canine.py b/src/transformers/models/canine/configuration_canine.py index e3d2d1373b..9add399112 100644 --- a/src/transformers/models/canine/configuration_canine.py +++ b/src/transformers/models/canine/configuration_canine.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CANINE model configuration""" +"""CANINE model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py index 5d50050d03..45dcdb2903 100644 --- a/src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert CANINE checkpoint.""" - import argparse from transformers import CanineConfig, CanineModel, CanineTokenizer, load_tf_weights_in_canine diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py index fda0ae72e6..c48559497a 100644 --- a/src/transformers/models/canine/modeling_canine.py +++ b/src/transformers/models/canine/modeling_canine.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CANINE model.""" - +"""PyTorch CANINE model.""" import copy import math diff --git a/src/transformers/models/chinese_clip/configuration_chinese_clip.py b/src/transformers/models/chinese_clip/configuration_chinese_clip.py index 0cd73f67f2..bedda2b71a 100644 --- a/src/transformers/models/chinese_clip/configuration_chinese_clip.py +++ b/src/transformers/models/chinese_clip/configuration_chinese_clip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Chinese-CLIP model configuration""" +"""Chinese-CLIP model configuration""" import os from collections import OrderedDict diff --git a/src/transformers/models/chinese_clip/modeling_chinese_clip.py b/src/transformers/models/chinese_clip/modeling_chinese_clip.py index 573a39fb1c..801969c465 100644 --- a/src/transformers/models/chinese_clip/modeling_chinese_clip.py +++ b/src/transformers/models/chinese_clip/modeling_chinese_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Chinese-CLIP model.""" - +"""PyTorch Chinese-CLIP model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/clap/configuration_clap.py b/src/transformers/models/clap/configuration_clap.py index 0a36402249..8a3826779f 100644 --- a/src/transformers/models/clap/configuration_clap.py +++ b/src/transformers/models/clap/configuration_clap.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CLAP model configuration""" +"""CLAP model configuration""" import os from typing import Union diff --git a/src/transformers/models/clap/feature_extraction_clap.py b/src/transformers/models/clap/feature_extraction_clap.py index ce18fedd19..2d1f16e194 100644 --- a/src/transformers/models/clap/feature_extraction_clap.py +++ b/src/transformers/models/clap/feature_extraction_clap.py @@ -14,7 +14,6 @@ # limitations under the License. """Feature extractor class for CLAP.""" - import copy from typing import Any, Dict, List, Optional, Union diff --git a/src/transformers/models/clap/modeling_clap.py b/src/transformers/models/clap/modeling_clap.py index 5b86ca657b..d97d36c154 100644 --- a/src/transformers/models/clap/modeling_clap.py +++ b/src/transformers/models/clap/modeling_clap.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CLAP model.""" +"""PyTorch CLAP model.""" + import collections import math from dataclasses import dataclass diff --git a/src/transformers/models/clip/configuration_clip.py b/src/transformers/models/clip/configuration_clip.py index 827fe31d5b..34fcef0673 100644 --- a/src/transformers/models/clip/configuration_clip.py +++ b/src/transformers/models/clip/configuration_clip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CLIP model configuration""" +"""CLIP model configuration""" import os from collections import OrderedDict diff --git a/src/transformers/models/clip/modeling_clip.py b/src/transformers/models/clip/modeling_clip.py index c2fc3424a9..ca2de67176 100644 --- a/src/transformers/models/clip/modeling_clip.py +++ b/src/transformers/models/clip/modeling_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CLIP model.""" - +"""PyTorch CLIP model.""" from dataclasses import dataclass from typing import Any, Optional, Tuple, Union diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 142141fdc4..b728da52c2 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 CLIP model.""" - +"""TF 2.0 CLIP model.""" from __future__ import annotations diff --git a/src/transformers/models/clip/tokenization_clip_fast.py b/src/transformers/models/clip/tokenization_clip_fast.py index 6198958a03..fe5badbc54 100644 --- a/src/transformers/models/clip/tokenization_clip_fast.py +++ b/src/transformers/models/clip/tokenization_clip_fast.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for OpenAI GPT.""" - from typing import List, Optional, Tuple from tokenizers import pre_tokenizers diff --git a/src/transformers/models/clipseg/configuration_clipseg.py b/src/transformers/models/clipseg/configuration_clipseg.py index 7df10bfe8c..df15c72076 100644 --- a/src/transformers/models/clipseg/configuration_clipseg.py +++ b/src/transformers/models/clipseg/configuration_clipseg.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CLIPSeg model configuration""" +"""CLIPSeg model configuration""" import os from typing import Union diff --git a/src/transformers/models/clipseg/modeling_clipseg.py b/src/transformers/models/clipseg/modeling_clipseg.py index 00dcecff2d..24d4b2322e 100644 --- a/src/transformers/models/clipseg/modeling_clipseg.py +++ b/src/transformers/models/clipseg/modeling_clipseg.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CLIPSeg model.""" +"""PyTorch CLIPSeg model.""" import copy import math diff --git a/src/transformers/models/clvp/configuration_clvp.py b/src/transformers/models/clvp/configuration_clvp.py index 505238b2a8..d40ef585aa 100644 --- a/src/transformers/models/clvp/configuration_clvp.py +++ b/src/transformers/models/clvp/configuration_clvp.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CLVP model configuration""" - +"""CLVP model configuration""" import os from typing import TYPE_CHECKING, Union diff --git a/src/transformers/models/clvp/modeling_clvp.py b/src/transformers/models/clvp/modeling_clvp.py index a36e982242..3a70d68057 100644 --- a/src/transformers/models/clvp/modeling_clvp.py +++ b/src/transformers/models/clvp/modeling_clvp.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CLVP model.""" - +"""PyTorch CLVP model.""" import copy import math diff --git a/src/transformers/models/clvp/number_normalizer.py b/src/transformers/models/clvp/number_normalizer.py index 86aa087e81..7824009727 100644 --- a/src/transformers/models/clvp/number_normalizer.py +++ b/src/transformers/models/clvp/number_normalizer.py @@ -15,7 +15,6 @@ """English Normalizer class for CLVP.""" - import re diff --git a/src/transformers/models/clvp/processing_clvp.py b/src/transformers/models/clvp/processing_clvp.py index 0723986db9..4e015cea1f 100644 --- a/src/transformers/models/clvp/processing_clvp.py +++ b/src/transformers/models/clvp/processing_clvp.py @@ -17,7 +17,6 @@ Processor class for CLVP """ - from ...processing_utils import ProcessorMixin diff --git a/src/transformers/models/code_llama/tokenization_code_llama.py b/src/transformers/models/code_llama/tokenization_code_llama.py index c3cbc44cea..5bbf2d0452 100644 --- a/src/transformers/models/code_llama/tokenization_code_llama.py +++ b/src/transformers/models/code_llama/tokenization_code_llama.py @@ -15,6 +15,7 @@ # limitations under the License. """Tokenization classes for Code LLaMA.""" + import os from shutil import copyfile from typing import Any, Dict, List, Optional, Tuple diff --git a/src/transformers/models/codegen/configuration_codegen.py b/src/transformers/models/codegen/configuration_codegen.py index db0008a033..cf69001480 100644 --- a/src/transformers/models/codegen/configuration_codegen.py +++ b/src/transformers/models/codegen/configuration_codegen.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CodeGen model configuration""" +"""CodeGen model configuration""" + from collections import OrderedDict from typing import Any, List, Mapping, Optional diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py index 0987bbc305..a8df9ed7f3 100644 --- a/src/transformers/models/codegen/modeling_codegen.py +++ b/src/transformers/models/codegen/modeling_codegen.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CodeGen model.""" +"""PyTorch CodeGen model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/codegen/tokenization_codegen.py b/src/transformers/models/codegen/tokenization_codegen.py index 1b03af7008..f3f765d273 100644 --- a/src/transformers/models/codegen/tokenization_codegen.py +++ b/src/transformers/models/codegen/tokenization_codegen.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for CodeGen""" - import json import os from functools import lru_cache diff --git a/src/transformers/models/codegen/tokenization_codegen_fast.py b/src/transformers/models/codegen/tokenization_codegen_fast.py index b086fb84a6..9fdf2ec38e 100644 --- a/src/transformers/models/codegen/tokenization_codegen_fast.py +++ b/src/transformers/models/codegen/tokenization_codegen_fast.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for OpenAI GPT.""" - import json import re from typing import TYPE_CHECKING, List, Optional, Tuple, Union diff --git a/src/transformers/models/cohere/configuration_cohere.py b/src/transformers/models/cohere/configuration_cohere.py index f0b0e50b91..4060811260 100644 --- a/src/transformers/models/cohere/configuration_cohere.py +++ b/src/transformers/models/cohere/configuration_cohere.py @@ -17,7 +17,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Cohere model configuration""" +"""Cohere model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index e5dc3930d3..cf7e5834b0 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Conditional DETR model configuration""" +"""Conditional DETR model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py index b1a1b1c817..91f00668be 100644 --- a/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Conditional DETR checkpoints.""" - import argparse import json from collections import OrderedDict diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 7fd04b8b43..aa905d9e96 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Conditional DETR model.""" - +"""PyTorch Conditional DETR model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/convbert/configuration_convbert.py b/src/transformers/models/convbert/configuration_convbert.py index 82d555cd3a..2c6b544568 100644 --- a/src/transformers/models/convbert/configuration_convbert.py +++ b/src/transformers/models/convbert/configuration_convbert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ConvBERT model configuration""" +"""ConvBERT model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index dd5d06ef92..b92ff686ed 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ConvBERT model.""" - +"""PyTorch ConvBERT model.""" import math import os @@ -81,72 +80,72 @@ def load_tf_weights_in_convbert(model, config, tf_checkpoint_path): group_dense_name = "dense" for j in range(config.num_hidden_layers): - param_mapping[ - f"encoder.layer.{j}.attention.self.query.weight" - ] = f"electra/encoder/layer_{j}/attention/self/query/kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.query.bias" - ] = f"electra/encoder/layer_{j}/attention/self/query/bias" - param_mapping[ - f"encoder.layer.{j}.attention.self.key.weight" - ] = f"electra/encoder/layer_{j}/attention/self/key/kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.key.bias" - ] = f"electra/encoder/layer_{j}/attention/self/key/bias" - param_mapping[ - f"encoder.layer.{j}.attention.self.value.weight" - ] = f"electra/encoder/layer_{j}/attention/self/value/kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.value.bias" - ] = f"electra/encoder/layer_{j}/attention/self/value/bias" - param_mapping[ - f"encoder.layer.{j}.attention.self.key_conv_attn_layer.depthwise.weight" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_key/depthwise_kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.key_conv_attn_layer.pointwise.weight" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_key/pointwise_kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.key_conv_attn_layer.bias" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_key/bias" - param_mapping[ - f"encoder.layer.{j}.attention.self.conv_kernel_layer.weight" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.conv_kernel_layer.bias" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/bias" - param_mapping[ - f"encoder.layer.{j}.attention.self.conv_out_layer.weight" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_point/kernel" - param_mapping[ - f"encoder.layer.{j}.attention.self.conv_out_layer.bias" - ] = f"electra/encoder/layer_{j}/attention/self/conv_attn_point/bias" - param_mapping[ - f"encoder.layer.{j}.attention.output.dense.weight" - ] = f"electra/encoder/layer_{j}/attention/output/dense/kernel" - param_mapping[ - f"encoder.layer.{j}.attention.output.LayerNorm.weight" - ] = f"electra/encoder/layer_{j}/attention/output/LayerNorm/gamma" - param_mapping[ - f"encoder.layer.{j}.attention.output.dense.bias" - ] = f"electra/encoder/layer_{j}/attention/output/dense/bias" - param_mapping[ - f"encoder.layer.{j}.attention.output.LayerNorm.bias" - ] = f"electra/encoder/layer_{j}/attention/output/LayerNorm/beta" - param_mapping[ - f"encoder.layer.{j}.intermediate.dense.weight" - ] = f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/kernel" - param_mapping[ - f"encoder.layer.{j}.intermediate.dense.bias" - ] = f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/bias" - param_mapping[ - f"encoder.layer.{j}.output.dense.weight" - ] = f"electra/encoder/layer_{j}/output/{group_dense_name}/kernel" - param_mapping[ - f"encoder.layer.{j}.output.dense.bias" - ] = f"electra/encoder/layer_{j}/output/{group_dense_name}/bias" - param_mapping[ - f"encoder.layer.{j}.output.LayerNorm.weight" - ] = f"electra/encoder/layer_{j}/output/LayerNorm/gamma" + param_mapping[f"encoder.layer.{j}.attention.self.query.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/query/kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.query.bias"] = ( + f"electra/encoder/layer_{j}/attention/self/query/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.self.key.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/key/kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.key.bias"] = ( + f"electra/encoder/layer_{j}/attention/self/key/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.self.value.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/value/kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.value.bias"] = ( + f"electra/encoder/layer_{j}/attention/self/value/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.self.key_conv_attn_layer.depthwise.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_key/depthwise_kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.key_conv_attn_layer.pointwise.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_key/pointwise_kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.key_conv_attn_layer.bias"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_key/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.self.conv_kernel_layer.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.conv_kernel_layer.bias"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.self.conv_out_layer.weight"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_point/kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.self.conv_out_layer.bias"] = ( + f"electra/encoder/layer_{j}/attention/self/conv_attn_point/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.output.dense.weight"] = ( + f"electra/encoder/layer_{j}/attention/output/dense/kernel" + ) + param_mapping[f"encoder.layer.{j}.attention.output.LayerNorm.weight"] = ( + f"electra/encoder/layer_{j}/attention/output/LayerNorm/gamma" + ) + param_mapping[f"encoder.layer.{j}.attention.output.dense.bias"] = ( + f"electra/encoder/layer_{j}/attention/output/dense/bias" + ) + param_mapping[f"encoder.layer.{j}.attention.output.LayerNorm.bias"] = ( + f"electra/encoder/layer_{j}/attention/output/LayerNorm/beta" + ) + param_mapping[f"encoder.layer.{j}.intermediate.dense.weight"] = ( + f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/kernel" + ) + param_mapping[f"encoder.layer.{j}.intermediate.dense.bias"] = ( + f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/bias" + ) + param_mapping[f"encoder.layer.{j}.output.dense.weight"] = ( + f"electra/encoder/layer_{j}/output/{group_dense_name}/kernel" + ) + param_mapping[f"encoder.layer.{j}.output.dense.bias"] = ( + f"electra/encoder/layer_{j}/output/{group_dense_name}/bias" + ) + param_mapping[f"encoder.layer.{j}.output.LayerNorm.weight"] = ( + f"electra/encoder/layer_{j}/output/LayerNorm/gamma" + ) param_mapping[f"encoder.layer.{j}.output.LayerNorm.bias"] = f"electra/encoder/layer_{j}/output/LayerNorm/beta" for param in model.named_parameters(): diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index a8ac11a8cd..95be5a56e1 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 ConvBERT model.""" - +"""TF 2.0 ConvBERT model.""" from __future__ import annotations diff --git a/src/transformers/models/convbert/tokenization_convbert.py b/src/transformers/models/convbert/tokenization_convbert.py index c0fe2c0183..f1bc98bf41 100644 --- a/src/transformers/models/convbert/tokenization_convbert.py +++ b/src/transformers/models/convbert/tokenization_convbert.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for ConvBERT.""" + import collections import os import unicodedata diff --git a/src/transformers/models/convbert/tokenization_convbert_fast.py b/src/transformers/models/convbert/tokenization_convbert_fast.py index 65bedb73fe..e9c47c2b04 100644 --- a/src/transformers/models/convbert/tokenization_convbert_fast.py +++ b/src/transformers/models/convbert/tokenization_convbert_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for ConvBERT.""" + import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/convnext/configuration_convnext.py b/src/transformers/models/convnext/configuration_convnext.py index 2549f06b99..291faa4e1a 100644 --- a/src/transformers/models/convnext/configuration_convnext.py +++ b/src/transformers/models/convnext/configuration_convnext.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ConvNeXT model configuration""" +"""ConvNeXT model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/convnext/convert_convnext_to_pytorch.py b/src/transformers/models/convnext/convert_convnext_to_pytorch.py index cdcbf24d55..27315ed73f 100644 --- a/src/transformers/models/convnext/convert_convnext_to_pytorch.py +++ b/src/transformers/models/convnext/convert_convnext_to_pytorch.py @@ -16,7 +16,6 @@ URL: https://github.com/facebookresearch/ConvNeXt""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/convnext/modeling_convnext.py b/src/transformers/models/convnext/modeling_convnext.py index 19eb1b599c..a0deaf96d5 100755 --- a/src/transformers/models/convnext/modeling_convnext.py +++ b/src/transformers/models/convnext/modeling_convnext.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ConvNext model.""" - +"""PyTorch ConvNext model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py index b92ac446d9..0e348a838a 100644 --- a/src/transformers/models/convnext/modeling_tf_convnext.py +++ b/src/transformers/models/convnext/modeling_tf_convnext.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 ConvNext model.""" - +"""TF 2.0 ConvNext model.""" from __future__ import annotations diff --git a/src/transformers/models/convnextv2/configuration_convnextv2.py b/src/transformers/models/convnextv2/configuration_convnextv2.py index e7692250b2..6d5b82b531 100644 --- a/src/transformers/models/convnextv2/configuration_convnextv2.py +++ b/src/transformers/models/convnextv2/configuration_convnextv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ConvNeXTV2 model configuration""" - +"""ConvNeXTV2 model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/convnextv2/modeling_convnextv2.py b/src/transformers/models/convnextv2/modeling_convnextv2.py index 3f965f9c2c..df13a5ea6b 100644 --- a/src/transformers/models/convnextv2/modeling_convnextv2.py +++ b/src/transformers/models/convnextv2/modeling_convnextv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ConvNextV2 model.""" - +"""PyTorch ConvNextV2 model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/convnextv2/modeling_tf_convnextv2.py b/src/transformers/models/convnextv2/modeling_tf_convnextv2.py index 0debe6fd0c..e39aee5159 100644 --- a/src/transformers/models/convnextv2/modeling_tf_convnextv2.py +++ b/src/transformers/models/convnextv2/modeling_tf_convnextv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 ConvNextV2 model.""" - +"""TF 2.0 ConvNextV2 model.""" from __future__ import annotations diff --git a/src/transformers/models/cpm/tokenization_cpm.py b/src/transformers/models/cpm/tokenization_cpm.py index ac454898b5..c92afb7eb6 100644 --- a/src/transformers/models/cpm/tokenization_cpm.py +++ b/src/transformers/models/cpm/tokenization_cpm.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes.""" + import os import unicodedata from shutil import copyfile diff --git a/src/transformers/models/cpm/tokenization_cpm_fast.py b/src/transformers/models/cpm/tokenization_cpm_fast.py index 9b7b6da118..3dcf624843 100644 --- a/src/transformers/models/cpm/tokenization_cpm_fast.py +++ b/src/transformers/models/cpm/tokenization_cpm_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes.""" + import os from shutil import copyfile from typing import List, Optional, Tuple diff --git a/src/transformers/models/cpmant/configuration_cpmant.py b/src/transformers/models/cpmant/configuration_cpmant.py index 4c2a880866..155811913a 100644 --- a/src/transformers/models/cpmant/configuration_cpmant.py +++ b/src/transformers/models/cpmant/configuration_cpmant.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CPMAnt model configuration""" +"""CPMAnt model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/cpmant/modeling_cpmant.py b/src/transformers/models/cpmant/modeling_cpmant.py index 9882d4ccc6..c8a3135052 100755 --- a/src/transformers/models/cpmant/modeling_cpmant.py +++ b/src/transformers/models/cpmant/modeling_cpmant.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CPMAnt""" - +"""PyTorch CPMAnt""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/cpmant/tokenization_cpmant.py b/src/transformers/models/cpmant/tokenization_cpmant.py index a5e66c7679..2ccb296c70 100644 --- a/src/transformers/models/cpmant/tokenization_cpmant.py +++ b/src/transformers/models/cpmant/tokenization_cpmant.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for CPMAnt.""" + import collections import os from typing import List, Optional, Tuple diff --git a/src/transformers/models/ctrl/configuration_ctrl.py b/src/transformers/models/ctrl/configuration_ctrl.py index 8fd01c10b5..adea61cd67 100644 --- a/src/transformers/models/ctrl/configuration_ctrl.py +++ b/src/transformers/models/ctrl/configuration_ctrl.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Salesforce CTRL configuration""" +"""Salesforce CTRL configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/ctrl/modeling_ctrl.py b/src/transformers/models/ctrl/modeling_ctrl.py index c0376f673a..d84c8bb37c 100644 --- a/src/transformers/models/ctrl/modeling_ctrl.py +++ b/src/transformers/models/ctrl/modeling_ctrl.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CTRL model.""" +"""PyTorch CTRL model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 86808190c8..1621cc17ca 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 CTRL model.""" +"""TF 2.0 CTRL model.""" from __future__ import annotations diff --git a/src/transformers/models/ctrl/tokenization_ctrl.py b/src/transformers/models/ctrl/tokenization_ctrl.py index fdae22d2c3..5305f2b231 100644 --- a/src/transformers/models/ctrl/tokenization_ctrl.py +++ b/src/transformers/models/ctrl/tokenization_ctrl.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for Salesforce CTRL.""" - import json import os from typing import Optional, Tuple diff --git a/src/transformers/models/cvt/configuration_cvt.py b/src/transformers/models/cvt/configuration_cvt.py index e8c50fbf77..a966701cee 100644 --- a/src/transformers/models/cvt/configuration_cvt.py +++ b/src/transformers/models/cvt/configuration_cvt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" CvT model configuration""" +"""CvT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py index ea4edac16c..f014365140 100644 --- a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py @@ -16,7 +16,6 @@ URL: https://github.com/microsoft/CvT""" - import argparse import json from collections import OrderedDict diff --git a/src/transformers/models/cvt/modeling_cvt.py b/src/transformers/models/cvt/modeling_cvt.py index 23728c770c..7963824444 100644 --- a/src/transformers/models/cvt/modeling_cvt.py +++ b/src/transformers/models/cvt/modeling_cvt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch CvT model.""" - +"""PyTorch CvT model.""" import collections.abc from dataclasses import dataclass diff --git a/src/transformers/models/cvt/modeling_tf_cvt.py b/src/transformers/models/cvt/modeling_tf_cvt.py index 03df5033b0..617fc99733 100644 --- a/src/transformers/models/cvt/modeling_tf_cvt.py +++ b/src/transformers/models/cvt/modeling_tf_cvt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Cvt model.""" - +"""TF 2.0 Cvt model.""" from __future__ import annotations diff --git a/src/transformers/models/data2vec/configuration_data2vec_audio.py b/src/transformers/models/data2vec/configuration_data2vec_audio.py index 32d505f157..54754a8c79 100644 --- a/src/transformers/models/data2vec/configuration_data2vec_audio.py +++ b/src/transformers/models/data2vec/configuration_data2vec_audio.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Data2VecText configuration""" +"""Data2VecText configuration""" import math diff --git a/src/transformers/models/data2vec/configuration_data2vec_text.py b/src/transformers/models/data2vec/configuration_data2vec_text.py index e7b15270ed..6cd7b80c30 100644 --- a/src/transformers/models/data2vec/configuration_data2vec_text.py +++ b/src/transformers/models/data2vec/configuration_data2vec_text.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Data2VecText configuration""" +"""Data2VecText configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/data2vec/configuration_data2vec_vision.py b/src/transformers/models/data2vec/configuration_data2vec_vision.py index 315f24a557..d63a564cec 100644 --- a/src/transformers/models/data2vec/configuration_data2vec_vision.py +++ b/src/transformers/models/data2vec/configuration_data2vec_vision.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Data2VecVision model configuration""" +"""Data2VecVision model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py index 01c2d8cab2..b5a30223bc 100644 --- a/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Wav2Vec2 checkpoint.""" - import argparse import os from functools import reduce diff --git a/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py index 81f5cd23fb..10b97dc93d 100644 --- a/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert data2vec checkpoint.""" - import argparse import os import pathlib diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index 8837c278c5..8be8b5ea8f 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Data2VecAudio model.""" +"""PyTorch Data2VecAudio model.""" import math import warnings diff --git a/src/transformers/models/data2vec/modeling_data2vec_vision.py b/src/transformers/models/data2vec/modeling_data2vec_vision.py index 12d62b78a0..03b8170e67 100644 --- a/src/transformers/models/data2vec/modeling_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_data2vec_vision.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Data2VecVision model.""" - +"""PyTorch Data2VecVision model.""" import collections.abc import math diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index e65a61fae5..3939afe570 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Data2Vec Vision model.""" - +"""TF 2.0 Data2Vec Vision model.""" from __future__ import annotations diff --git a/src/transformers/models/dbrx/configuration_dbrx.py b/src/transformers/models/dbrx/configuration_dbrx.py index b03d2c17b0..91f4fc3a4b 100644 --- a/src/transformers/models/dbrx/configuration_dbrx.py +++ b/src/transformers/models/dbrx/configuration_dbrx.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DBRX model configuration """ +"""DBRX model configuration""" from typing import Any, Optional diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index f483f79cd5..850e3a3f81 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DBRX model. """ +"""PyTorch DBRX model.""" import math from typing import Any, Optional, Tuple, Union diff --git a/src/transformers/models/deberta/configuration_deberta.py b/src/transformers/models/deberta/configuration_deberta.py index e79e7238ab..59b59764c3 100644 --- a/src/transformers/models/deberta/configuration_deberta.py +++ b/src/transformers/models/deberta/configuration_deberta.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DeBERTa model configuration""" +"""DeBERTa model configuration""" + from collections import OrderedDict from typing import TYPE_CHECKING, Any, Mapping, Optional, Union diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index 03646b95aa..964e3add91 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DeBERTa model.""" +"""PyTorch DeBERTa model.""" from collections.abc import Sequence from typing import Optional, Tuple, Union diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index 774d6296d0..6762c69ec5 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 DeBERTa model.""" - +"""TF 2.0 DeBERTa model.""" from __future__ import annotations diff --git a/src/transformers/models/deberta/tokenization_deberta.py b/src/transformers/models/deberta/tokenization_deberta.py index b846a78915..371aa98662 100644 --- a/src/transformers/models/deberta/tokenization_deberta.py +++ b/src/transformers/models/deberta/tokenization_deberta.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model DeBERTa.""" +"""Tokenization class for model DeBERTa.""" import json import os diff --git a/src/transformers/models/deberta/tokenization_deberta_fast.py b/src/transformers/models/deberta/tokenization_deberta_fast.py index 07226443d3..b28732850b 100644 --- a/src/transformers/models/deberta/tokenization_deberta_fast.py +++ b/src/transformers/models/deberta/tokenization_deberta_fast.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fast Tokenization class for model DeBERTa.""" +"""Fast Tokenization class for model DeBERTa.""" import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/deberta_v2/configuration_deberta_v2.py b/src/transformers/models/deberta_v2/configuration_deberta_v2.py index 78fdd91c81..83745980fb 100644 --- a/src/transformers/models/deberta_v2/configuration_deberta_v2.py +++ b/src/transformers/models/deberta_v2/configuration_deberta_v2.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DeBERTa-v2 model configuration""" +"""DeBERTa-v2 model configuration""" + from collections import OrderedDict from typing import TYPE_CHECKING, Any, Mapping, Optional, Union diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py index 572ee9196f..fd910e9daf 100644 --- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DeBERTa-v2 model.""" +"""PyTorch DeBERTa-v2 model.""" from collections.abc import Sequence from typing import Optional, Tuple, Union diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index 9bd6258100..15ab6da158 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 DeBERTa-v2 model.""" +"""TF 2.0 DeBERTa-v2 model.""" from __future__ import annotations diff --git a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py index a921039454..2876ac7660 100644 --- a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py +++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model DeBERTa.""" +"""Tokenization class for model DeBERTa.""" import os import unicodedata diff --git a/src/transformers/models/decision_transformer/configuration_decision_transformer.py b/src/transformers/models/decision_transformer/configuration_decision_transformer.py index 6f1fb500ba..19e89afecb 100644 --- a/src/transformers/models/decision_transformer/configuration_decision_transformer.py +++ b/src/transformers/models/decision_transformer/configuration_decision_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Decision Transformer model configuration""" +"""Decision Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/decision_transformer/modeling_decision_transformer.py b/src/transformers/models/decision_transformer/modeling_decision_transformer.py index be7928995c..236efb1d22 100755 --- a/src/transformers/models/decision_transformer/modeling_decision_transformer.py +++ b/src/transformers/models/decision_transformer/modeling_decision_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DecisionTransformer model.""" +"""PyTorch DecisionTransformer model.""" import math import os diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index b623d3504a..d888f6e6f4 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Deformable DETR model configuration""" +"""Deformable DETR model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py index 928fa368ed..b637ba6d84 100644 --- a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py +++ b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Deformable DETR checkpoints.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/deformable_detr/load_custom.py b/src/transformers/models/deformable_detr/load_custom.py index c3a822e276..3c0b3a432b 100644 --- a/src/transformers/models/deformable_detr/load_custom.py +++ b/src/transformers/models/deformable_detr/load_custom.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Loading of Deformable DETR's CUDA kernels""" +"""Loading of Deformable DETR's CUDA kernels""" + import os from pathlib import Path diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index b77ae4f8f8..f619575bd8 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Deformable DETR model.""" - +"""PyTorch Deformable DETR model.""" import copy import math diff --git a/src/transformers/models/deit/configuration_deit.py b/src/transformers/models/deit/configuration_deit.py index e1767c35fd..3784ed76ab 100644 --- a/src/transformers/models/deit/configuration_deit.py +++ b/src/transformers/models/deit/configuration_deit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DeiT model configuration""" +"""DeiT model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/deit/convert_deit_timm_to_pytorch.py b/src/transformers/models/deit/convert_deit_timm_to_pytorch.py index 2b5c795ff2..e7bf3e7a12 100644 --- a/src/transformers/models/deit/convert_deit_timm_to_pytorch.py +++ b/src/transformers/models/deit/convert_deit_timm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DeiT distilled checkpoints from the timm library.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index fe811ecc4a..c9e54d3b87 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DeiT model.""" - +"""PyTorch DeiT model.""" import collections.abc import math diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py index 43a3465ba1..e5faff2a4a 100644 --- a/src/transformers/models/deit/modeling_tf_deit.py +++ b/src/transformers/models/deit/modeling_tf_deit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow DeiT model.""" - +"""TensorFlow DeiT model.""" from __future__ import annotations diff --git a/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py index 5dc9a244c4..e2f64e9c3c 100644 --- a/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Bort checkpoint.""" - import argparse import os diff --git a/src/transformers/models/deprecated/mctct/modeling_mctct.py b/src/transformers/models/deprecated/mctct/modeling_mctct.py index 95c860fa9a..931495611a 100755 --- a/src/transformers/models/deprecated/mctct/modeling_mctct.py +++ b/src/transformers/models/deprecated/mctct/modeling_mctct.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch M-CTC-T model.""" - +"""PyTorch M-CTC-T model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/deprecated/mctct/processing_mctct.py b/src/transformers/models/deprecated/mctct/processing_mctct.py index 4e0cbe27dd..e2201c0ed5 100644 --- a/src/transformers/models/deprecated/mctct/processing_mctct.py +++ b/src/transformers/models/deprecated/mctct/processing_mctct.py @@ -15,6 +15,7 @@ """ Speech processor class for M-CTC-T """ + import warnings from contextlib import contextmanager diff --git a/src/transformers/models/deprecated/mmbt/configuration_mmbt.py b/src/transformers/models/deprecated/mmbt/configuration_mmbt.py index df5161b092..8fcc0f1d63 100644 --- a/src/transformers/models/deprecated/mmbt/configuration_mmbt.py +++ b/src/transformers/models/deprecated/mmbt/configuration_mmbt.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MMBT configuration""" +"""MMBT configuration""" from ....utils import logging diff --git a/src/transformers/models/deprecated/mmbt/modeling_mmbt.py b/src/transformers/models/deprecated/mmbt/modeling_mmbt.py index 8dc450ce8f..4a06de5698 100644 --- a/src/transformers/models/deprecated/mmbt/modeling_mmbt.py +++ b/src/transformers/models/deprecated/mmbt/modeling_mmbt.py @@ -15,7 +15,6 @@ # limitations under the License. """PyTorch MMBT model.""" - import torch from torch import nn from torch.nn import CrossEntropyLoss, MSELoss diff --git a/src/transformers/models/deprecated/open_llama/configuration_open_llama.py b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py index ae2add5a5f..259fd19367 100644 --- a/src/transformers/models/deprecated/open_llama/configuration_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py @@ -17,7 +17,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Open-Llama model configuration""" +"""Open-Llama model configuration""" from ....configuration_utils import PretrainedConfig from ....utils import logging diff --git a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py index 4e42d716e8..e748529c9e 100644 --- a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Open-Llama model.""" +"""PyTorch Open-Llama model.""" + import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/deprecated/retribert/configuration_retribert.py b/src/transformers/models/deprecated/retribert/configuration_retribert.py index dfa7d3b65b..f154bb04c6 100644 --- a/src/transformers/models/deprecated/retribert/configuration_retribert.py +++ b/src/transformers/models/deprecated/retribert/configuration_retribert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RetriBERT model configuration""" +"""RetriBERT model configuration""" from ....configuration_utils import PretrainedConfig from ....utils import logging diff --git a/src/transformers/models/deprecated/retribert/modeling_retribert.py b/src/transformers/models/deprecated/retribert/modeling_retribert.py index d8af23fb49..3af3f7be49 100644 --- a/src/transformers/models/deprecated/retribert/modeling_retribert.py +++ b/src/transformers/models/deprecated/retribert/modeling_retribert.py @@ -16,7 +16,6 @@ RetriBERT model """ - import math from typing import Optional diff --git a/src/transformers/models/deprecated/trajectory_transformer/configuration_trajectory_transformer.py b/src/transformers/models/deprecated/trajectory_transformer/configuration_trajectory_transformer.py index 06ec12161f..6ce86dfb7a 100644 --- a/src/transformers/models/deprecated/trajectory_transformer/configuration_trajectory_transformer.py +++ b/src/transformers/models/deprecated/trajectory_transformer/configuration_trajectory_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TrajectoryTransformer model configuration""" +"""TrajectoryTransformer model configuration""" from ....configuration_utils import PretrainedConfig from ....utils import logging diff --git a/src/transformers/models/deprecated/trajectory_transformer/convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/trajectory_transformer/convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py index 622552fa78..da7f780667 100644 --- a/src/transformers/models/deprecated/trajectory_transformer/convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/trajectory_transformer/convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TrajectoryTransformer pytorch checkpoint conversion""" +"""TrajectoryTransformer pytorch checkpoint conversion""" import torch import trajectory.utils as utils diff --git a/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py b/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py index 24e1815218..5bb787b87d 100644 --- a/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py +++ b/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch TrajectoryTransformer model.""" +"""PyTorch TrajectoryTransformer model.""" import math import os diff --git a/src/transformers/models/deprecated/transfo_xl/configuration_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/configuration_transfo_xl.py index c9dde2e8fd..5cd031649f 100644 --- a/src/transformers/models/deprecated/transfo_xl/configuration_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/configuration_transfo_xl.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Transformer XL configuration""" +"""Transformer XL configuration""" from ....configuration_utils import PretrainedConfig from ....utils import logging diff --git a/src/transformers/models/deprecated/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py index d2693ac333..cf191f797f 100644 --- a/src/transformers/models/deprecated/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Transformer XL checkpoint and datasets.""" - import argparse import os import pickle diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py index e490eb8dba..2eb3fe4893 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - TF 2.0 Transformer XL model. +TF 2.0 Transformer XL model. """ from __future__ import annotations diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl_utilities.py b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl_utilities.py index ed1488d559..48205e06fb 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl_utilities.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl_utilities.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - A TF 2.0 Adaptive Softmax for Transformer XL model. +A TF 2.0 Adaptive Softmax for Transformer XL model. """ - import tensorflow as tf from ....modeling_tf_utils import keras diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py index 52989cfd13..84b60dbf6d 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py @@ -14,9 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - PyTorch Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. In particular - https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py +PyTorch Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. In particular +https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py """ + import warnings from dataclasses import dataclass from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl_utilities.py b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl_utilities.py index addf2a0837..f76f3ccc62 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl_utilities.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl_utilities.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Utilities for PyTorch Transformer XL model. Directly adapted from https://github.com/kimiyoung/transformer-xl. +Utilities for PyTorch Transformer XL model. Directly adapted from https://github.com/kimiyoung/transformer-xl. """ - import torch from torch import nn diff --git a/src/transformers/models/deprecated/transfo_xl/tokenization_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/tokenization_transfo_xl.py index 7290a7a83b..4229e8e5b3 100644 --- a/src/transformers/models/deprecated/transfo_xl/tokenization_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/tokenization_transfo_xl.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Tokenization classes for Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. +Tokenization classes for Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. """ - import glob import os import pickle diff --git a/src/transformers/models/deprecated/van/configuration_van.py b/src/transformers/models/deprecated/van/configuration_van.py index 68a139ffdf..2935e631e0 100644 --- a/src/transformers/models/deprecated/van/configuration_van.py +++ b/src/transformers/models/deprecated/van/configuration_van.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VAN model configuration""" +"""VAN model configuration""" from ....configuration_utils import PretrainedConfig from ....utils import logging diff --git a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py index 20492e42be..51466e77ba 100644 --- a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py +++ b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py @@ -16,7 +16,6 @@ URL: https://github.com/Visual-Attention-Network/VAN-Classification""" - import argparse import json import sys diff --git a/src/transformers/models/deprecated/van/modeling_van.py b/src/transformers/models/deprecated/van/modeling_van.py index 0f5940707c..1b26d8892b 100644 --- a/src/transformers/models/deprecated/van/modeling_van.py +++ b/src/transformers/models/deprecated/van/modeling_van.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Visual Attention Network (VAN) model.""" +"""PyTorch Visual Attention Network (VAN) model.""" import math from collections import OrderedDict diff --git a/src/transformers/models/depth_anything/configuration_depth_anything.py b/src/transformers/models/depth_anything/configuration_depth_anything.py index b6d6f388b0..9f38434abc 100644 --- a/src/transformers/models/depth_anything/configuration_depth_anything.py +++ b/src/transformers/models/depth_anything/configuration_depth_anything.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DepthAnything model configuration""" +"""DepthAnything model configuration""" import copy diff --git a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py index 022a66c0d6..9b9836e852 100644 --- a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py +++ b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py @@ -15,7 +15,6 @@ """Convert Depth Anything checkpoints from the original repository. URL: https://github.com/LiheYoung/Depth-Anything""" - import argparse from pathlib import Path diff --git a/src/transformers/models/depth_anything/modeling_depth_anything.py b/src/transformers/models/depth_anything/modeling_depth_anything.py index 9780278a6f..3fb2b32f79 100644 --- a/src/transformers/models/depth_anything/modeling_depth_anything.py +++ b/src/transformers/models/depth_anything/modeling_depth_anything.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Depth Anything model.""" - +"""PyTorch Depth Anything model.""" from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index b876e843c0..d7fe7eadc7 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DETA model configuration""" - +"""DETA model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py index cc17568bd6..870c56f838 100644 --- a/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py +++ b/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py @@ -16,7 +16,6 @@ URL: https://github.com/jozhang97/DETA/tree/master""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/deta/convert_deta_swin_to_pytorch.py b/src/transformers/models/deta/convert_deta_swin_to_pytorch.py index 911bc434e1..67052edce1 100644 --- a/src/transformers/models/deta/convert_deta_swin_to_pytorch.py +++ b/src/transformers/models/deta/convert_deta_swin_to_pytorch.py @@ -16,7 +16,6 @@ URL: https://github.com/jozhang97/DETA/tree/master""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/deta/modeling_deta.py b/src/transformers/models/deta/modeling_deta.py index bd2faad271..fcd8fd82b6 100644 --- a/src/transformers/models/deta/modeling_deta.py +++ b/src/transformers/models/deta/modeling_deta.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DETA model.""" - +"""PyTorch DETA model.""" import copy import math diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 094859bda5..5e8c3b1fd8 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DETR model configuration""" +"""DETR model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py index 72de2be870..ba98514501 100644 --- a/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DETR checkpoints with timm backbone.""" - import argparse import json from collections import OrderedDict diff --git a/src/transformers/models/detr/convert_detr_to_pytorch.py b/src/transformers/models/detr/convert_detr_to_pytorch.py index a52e592b94..6ba6a0e292 100644 --- a/src/transformers/models/detr/convert_detr_to_pytorch.py +++ b/src/transformers/models/detr/convert_detr_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DETR checkpoints with native (Transformers) backbone.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 37da6b6ee5..ff8b1416b0 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DETR model.""" - +"""PyTorch DETR model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/dinat/configuration_dinat.py b/src/transformers/models/dinat/configuration_dinat.py index 6138e8072b..220561152b 100644 --- a/src/transformers/models/dinat/configuration_dinat.py +++ b/src/transformers/models/dinat/configuration_dinat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Dilated Neighborhood Attention Transformer model configuration""" +"""Dilated Neighborhood Attention Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index 03c0fe33a2..95d6a60183 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Dilated Neighborhood Attention Transformer model.""" - +"""PyTorch Dilated Neighborhood Attention Transformer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/dinov2/configuration_dinov2.py b/src/transformers/models/dinov2/configuration_dinov2.py index 48feba23b1..2df883de16 100644 --- a/src/transformers/models/dinov2/configuration_dinov2.py +++ b/src/transformers/models/dinov2/configuration_dinov2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DINOv2 model configuration""" +"""DINOv2 model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/dinov2/convert_dinov2_to_hf.py b/src/transformers/models/dinov2/convert_dinov2_to_hf.py index dd5871e6c4..5583413eb7 100644 --- a/src/transformers/models/dinov2/convert_dinov2_to_hf.py +++ b/src/transformers/models/dinov2/convert_dinov2_to_hf.py @@ -17,7 +17,6 @@ URL: https://github.com/facebookresearch/dinov2/tree/main """ - import argparse import json from pathlib import Path diff --git a/src/transformers/models/dinov2/modeling_dinov2.py b/src/transformers/models/dinov2/modeling_dinov2.py index 505c1b5c06..3a7959c27d 100644 --- a/src/transformers/models/dinov2/modeling_dinov2.py +++ b/src/transformers/models/dinov2/modeling_dinov2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DINOv2 model.""" - +"""PyTorch DINOv2 model.""" import collections.abc import math diff --git a/src/transformers/models/distilbert/configuration_distilbert.py b/src/transformers/models/distilbert/configuration_distilbert.py index 45e2d3b3f0..a2ce1a2419 100644 --- a/src/transformers/models/distilbert/configuration_distilbert.py +++ b/src/transformers/models/distilbert/configuration_distilbert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DistilBERT model configuration""" +"""DistilBERT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index e8357682ff..8c65a4b215 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -14,11 +14,10 @@ # limitations under the License. """ - PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in - part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert) +PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in +part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert) """ - import math from typing import Dict, List, Optional, Set, Tuple, Union diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 1e663c9229..87dab93ca1 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - TF 2.0 DistilBERT model +TF 2.0 DistilBERT model """ - from __future__ import annotations import warnings diff --git a/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py b/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py index c754b9bbf3..40c5b22e3b 100644 --- a/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py +++ b/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DiT checkpoints from the unilm repository.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/donut/configuration_donut_swin.py b/src/transformers/models/donut/configuration_donut_swin.py index 80418e7144..b9f9fae39c 100644 --- a/src/transformers/models/donut/configuration_donut_swin.py +++ b/src/transformers/models/donut/configuration_donut_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Donut Swin Transformer model configuration""" +"""Donut Swin Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/donut/convert_donut_to_pytorch.py b/src/transformers/models/donut/convert_donut_to_pytorch.py index 13f669ad97..913bf2b64b 100644 --- a/src/transformers/models/donut/convert_donut_to_pytorch.py +++ b/src/transformers/models/donut/convert_donut_to_pytorch.py @@ -106,22 +106,22 @@ def convert_state_dict(orig_state_dict, model): orig_state_dict[ f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight" ] = val[:dim, :] - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight" - ] = val[dim : dim * 2, :] + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"] = ( + val[dim : dim * 2, :] + ) orig_state_dict[ f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight" ] = val[-dim:, :] else: - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias" - ] = val[:dim] - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias" - ] = val[dim : dim * 2] - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias" - ] = val[-dim:] + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"] = ( + val[:dim] + ) + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias"] = ( + val[dim : dim * 2] + ) + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias"] = ( + val[-dim:] + ) elif "attn_mask" in key or key in ["encoder.model.norm.weight", "encoder.model.norm.bias"]: # HuggingFace implementation doesn't use attn_mask buffer # and model doesn't use final LayerNorms for the encoder diff --git a/src/transformers/models/donut/modeling_donut_swin.py b/src/transformers/models/donut/modeling_donut_swin.py index 5b8a0e27b0..4775d00c19 100644 --- a/src/transformers/models/donut/modeling_donut_swin.py +++ b/src/transformers/models/donut/modeling_donut_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Donut Swin Transformer model. +"""PyTorch Donut Swin Transformer model. This implementation is identical to a regular Swin Transformer, without final layer norm on top of the final hidden states.""" diff --git a/src/transformers/models/donut/processing_donut.py b/src/transformers/models/donut/processing_donut.py index 1f03fd6306..daf6e7d1df 100644 --- a/src/transformers/models/donut/processing_donut.py +++ b/src/transformers/models/donut/processing_donut.py @@ -15,6 +15,7 @@ """ Processor class for Donut. """ + import re import warnings from contextlib import contextmanager diff --git a/src/transformers/models/dpr/configuration_dpr.py b/src/transformers/models/dpr/configuration_dpr.py index 3d7abec3eb..b22da23ca4 100644 --- a/src/transformers/models/dpr/configuration_dpr.py +++ b/src/transformers/models/dpr/configuration_dpr.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DPR model configuration""" +"""DPR model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/dpr/modeling_dpr.py b/src/transformers/models/dpr/modeling_dpr.py index 2094440a7f..7ba63f134c 100644 --- a/src/transformers/models/dpr/modeling_dpr.py +++ b/src/transformers/models/dpr/modeling_dpr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DPR model for Open Domain Question Answering.""" - +"""PyTorch DPR model for Open Domain Question Answering.""" from dataclasses import dataclass from typing import Optional, Tuple, Union diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index a2e539f73f..92a0e54cbb 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow DPR model for Open Domain Question Answering.""" +"""TensorFlow DPR model for Open Domain Question Answering.""" from __future__ import annotations diff --git a/src/transformers/models/dpr/tokenization_dpr.py b/src/transformers/models/dpr/tokenization_dpr.py index 1362047ce2..45ce73425f 100644 --- a/src/transformers/models/dpr/tokenization_dpr.py +++ b/src/transformers/models/dpr/tokenization_dpr.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for DPR.""" - import collections from typing import List, Optional, Union diff --git a/src/transformers/models/dpr/tokenization_dpr_fast.py b/src/transformers/models/dpr/tokenization_dpr_fast.py index 730f200a68..69ac58a77d 100644 --- a/src/transformers/models/dpr/tokenization_dpr_fast.py +++ b/src/transformers/models/dpr/tokenization_dpr_fast.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for DPR.""" - import collections from typing import List, Optional, Union diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index b21864e9b0..7ae48f7519 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DPT model configuration""" +"""DPT model configuration""" import copy diff --git a/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py b/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py index 2bd147096c..7b3715bddf 100644 --- a/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py +++ b/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py @@ -15,7 +15,6 @@ """Convert DINOv2 + DPT checkpoints from the original repository. URL: https://github.com/facebookresearch/dinov2/tree/main""" - import argparse import itertools import math diff --git a/src/transformers/models/dpt/convert_dpt_beit_to_hf.py b/src/transformers/models/dpt/convert_dpt_beit_to_hf.py index eb335a0ea2..3a576d772f 100644 --- a/src/transformers/models/dpt/convert_dpt_beit_to_hf.py +++ b/src/transformers/models/dpt/convert_dpt_beit_to_hf.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DPT 3.1 checkpoints from the MiDaS repository. URL: https://github.com/isl-org/MiDaS""" - import argparse from pathlib import Path diff --git a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py index 0fa69adfaf..1304acaafc 100644 --- a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DPT checkpoints from the original repository. URL: https://github.com/isl-org/DPT""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/dpt/convert_dpt_swinv2_to_hf.py b/src/transformers/models/dpt/convert_dpt_swinv2_to_hf.py index fd6522ab6b..0feebe72d4 100644 --- a/src/transformers/models/dpt/convert_dpt_swinv2_to_hf.py +++ b/src/transformers/models/dpt/convert_dpt_swinv2_to_hf.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DPT 3.1 checkpoints from the MiDaS repository. URL: https://github.com/isl-org/MiDaS""" - import argparse from pathlib import Path diff --git a/src/transformers/models/dpt/convert_dpt_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_to_pytorch.py index 42637cb215..b55c96f0c7 100644 --- a/src/transformers/models/dpt/convert_dpt_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert DPT checkpoints from the original repository. URL: https://github.com/isl-org/DPT""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index 5240b651cc..a15c9caca2 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -12,14 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch DPT (Dense Prediction Transformers) model. +"""PyTorch DPT (Dense Prediction Transformers) model. This implementation is heavily inspired by OpenMMLab's implementation, found here: https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/dpt_head.py. """ - import collections.abc import math from dataclasses import dataclass diff --git a/src/transformers/models/efficientformer/configuration_efficientformer.py b/src/transformers/models/efficientformer/configuration_efficientformer.py index 0cc7635ea0..a9fbfa7e05 100644 --- a/src/transformers/models/efficientformer/configuration_efficientformer.py +++ b/src/transformers/models/efficientformer/configuration_efficientformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" EfficientFormer model configuration""" +"""EfficientFormer model configuration""" from typing import List diff --git a/src/transformers/models/efficientformer/modeling_efficientformer.py b/src/transformers/models/efficientformer/modeling_efficientformer.py index 6d79fe5bae..44d2adbed4 100644 --- a/src/transformers/models/efficientformer/modeling_efficientformer.py +++ b/src/transformers/models/efficientformer/modeling_efficientformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch EfficientFormer model.""" +"""PyTorch EfficientFormer model.""" import itertools from dataclasses import dataclass diff --git a/src/transformers/models/efficientformer/modeling_tf_efficientformer.py b/src/transformers/models/efficientformer/modeling_tf_efficientformer.py index 605487e6fe..a8ce9dd306 100644 --- a/src/transformers/models/efficientformer/modeling_tf_efficientformer.py +++ b/src/transformers/models/efficientformer/modeling_tf_efficientformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow EfficientFormer model.""" +"""TensorFlow EfficientFormer model.""" import itertools from dataclasses import dataclass diff --git a/src/transformers/models/efficientnet/configuration_efficientnet.py b/src/transformers/models/efficientnet/configuration_efficientnet.py index 63480a9a6d..4c7feb377f 100644 --- a/src/transformers/models/efficientnet/configuration_efficientnet.py +++ b/src/transformers/models/efficientnet/configuration_efficientnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" EfficientNet model configuration""" +"""EfficientNet model configuration""" from collections import OrderedDict from typing import List, Mapping diff --git a/src/transformers/models/efficientnet/modeling_efficientnet.py b/src/transformers/models/efficientnet/modeling_efficientnet.py index b047e4f89f..057cd42f2a 100644 --- a/src/transformers/models/efficientnet/modeling_efficientnet.py +++ b/src/transformers/models/efficientnet/modeling_efficientnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch EfficientNet model.""" - +"""PyTorch EfficientNet model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/electra/configuration_electra.py b/src/transformers/models/electra/configuration_electra.py index f4ca304983..17be728ed6 100644 --- a/src/transformers/models/electra/configuration_electra.py +++ b/src/transformers/models/electra/configuration_electra.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ELECTRA model configuration""" +"""ELECTRA model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py index d5d6376d7b..b0abc30cd7 100644 --- a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ELECTRA checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index b903e7f082..a289bb9728 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF Electra model.""" - +"""TF Electra model.""" from __future__ import annotations diff --git a/src/transformers/models/encodec/configuration_encodec.py b/src/transformers/models/encodec/configuration_encodec.py index 4d8611a178..bc10e8ffc3 100644 --- a/src/transformers/models/encodec/configuration_encodec.py +++ b/src/transformers/models/encodec/configuration_encodec.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" EnCodec model configuration""" - +"""EnCodec model configuration""" import math from typing import Optional diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index 0df2a2a6ca..9627742b9e 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch EnCodec model.""" +"""PyTorch EnCodec model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 2b185cc14a..b568850060 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support Encoder-Decoder architectures""" - +"""Classes to support Encoder-Decoder architectures""" import gc import inspect diff --git a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py index beecd08032..24b053969c 100644 --- a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support Flax Encoder-Decoder architectures""" - +"""Classes to support Flax Encoder-Decoder architectures""" import os from typing import Optional, Tuple, Union diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 855fb767d1..85802b77f3 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support TF Encoder-Decoder architectures""" - +"""Classes to support TF Encoder-Decoder architectures""" from __future__ import annotations diff --git a/src/transformers/models/ernie/configuration_ernie.py b/src/transformers/models/ernie/configuration_ernie.py index 16f1448990..808a0c2722 100644 --- a/src/transformers/models/ernie/configuration_ernie.py +++ b/src/transformers/models/ernie/configuration_ernie.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ERNIE model configuration""" +"""ERNIE model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/ernie/modeling_ernie.py b/src/transformers/models/ernie/modeling_ernie.py index 4c2dc767f8..298465b6c9 100644 --- a/src/transformers/models/ernie/modeling_ernie.py +++ b/src/transformers/models/ernie/modeling_ernie.py @@ -14,7 +14,6 @@ # limitations under the License. """PyTorch ERNIE model.""" - import math import warnings from dataclasses import dataclass diff --git a/src/transformers/models/ernie_m/configuration_ernie_m.py b/src/transformers/models/ernie_m/configuration_ernie_m.py index 9c67a86421..cf34a510f2 100644 --- a/src/transformers/models/ernie_m/configuration_ernie_m.py +++ b/src/transformers/models/ernie_m/configuration_ernie_m.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ErnieM model configuration""" +"""ErnieM model configuration""" # Adapted from original paddlenlp repository.(https://github.com/PaddlePaddle/PaddleNLP/blob/develop/paddlenlp/transformers/ernie_m/configuration.py) from __future__ import annotations diff --git a/src/transformers/models/ernie_m/modeling_ernie_m.py b/src/transformers/models/ernie_m/modeling_ernie_m.py index b8f7e87c85..6b977801fe 100755 --- a/src/transformers/models/ernie_m/modeling_ernie_m.py +++ b/src/transformers/models/ernie_m/modeling_ernie_m.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ErnieM model.""" - +"""PyTorch ErnieM model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/esm/configuration_esm.py b/src/transformers/models/esm/configuration_esm.py index feb06031e5..9634a20015 100644 --- a/src/transformers/models/esm/configuration_esm.py +++ b/src/transformers/models/esm/configuration_esm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ESM model configuration""" +"""ESM model configuration""" from dataclasses import asdict, dataclass from typing import Optional diff --git a/src/transformers/models/esm/convert_esm.py b/src/transformers/models/esm/convert_esm.py index 22ca3f5392..020dd4e576 100644 --- a/src/transformers/models/esm/convert_esm.py +++ b/src/transformers/models/esm/convert_esm.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ESM checkpoint.""" - import argparse import pathlib from pathlib import Path diff --git a/src/transformers/models/esm/modeling_esm.py b/src/transformers/models/esm/modeling_esm.py index e5683a3c99..08819b7f77 100755 --- a/src/transformers/models/esm/modeling_esm.py +++ b/src/transformers/models/esm/modeling_esm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ESM model.""" +"""PyTorch ESM model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py index 2688c207b0..7cb673103d 100644 --- a/src/transformers/models/esm/modeling_tf_esm.py +++ b/src/transformers/models/esm/modeling_tf_esm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ESM model.""" - +"""PyTorch ESM model.""" from __future__ import annotations diff --git a/src/transformers/models/esm/openfold_utils/feats.py b/src/transformers/models/esm/openfold_utils/feats.py index 18b01a1fec..ac7b90dfe7 100644 --- a/src/transformers/models/esm/openfold_utils/feats.py +++ b/src/transformers/models/esm/openfold_utils/feats.py @@ -25,15 +25,13 @@ from .tensor_utils import batched_gather @overload -def pseudo_beta_fn(aatype: torch.Tensor, all_atom_positions: torch.Tensor, all_atom_masks: None) -> torch.Tensor: - ... +def pseudo_beta_fn(aatype: torch.Tensor, all_atom_positions: torch.Tensor, all_atom_masks: None) -> torch.Tensor: ... @overload def pseudo_beta_fn( aatype: torch.Tensor, all_atom_positions: torch.Tensor, all_atom_masks: torch.Tensor -) -> Tuple[torch.Tensor, torch.Tensor]: - ... +) -> Tuple[torch.Tensor, torch.Tensor]: ... def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks): diff --git a/src/transformers/models/esm/openfold_utils/protein.py b/src/transformers/models/esm/openfold_utils/protein.py index 32e0157171..ae9d8c1327 100644 --- a/src/transformers/models/esm/openfold_utils/protein.py +++ b/src/transformers/models/esm/openfold_utils/protein.py @@ -14,6 +14,7 @@ # limitations under the License. """Protein data type.""" + import dataclasses import re import string diff --git a/src/transformers/models/esm/openfold_utils/tensor_utils.py b/src/transformers/models/esm/openfold_utils/tensor_utils.py index 99dd6dbe47..20ee34b236 100644 --- a/src/transformers/models/esm/openfold_utils/tensor_utils.py +++ b/src/transformers/models/esm/openfold_utils/tensor_utils.py @@ -108,23 +108,19 @@ def dict_map( @overload -def tree_map(fn: Callable[[T], Any], tree: T, leaf_type: Type[T]) -> Any: - ... +def tree_map(fn: Callable[[T], Any], tree: T, leaf_type: Type[T]) -> Any: ... @overload -def tree_map(fn: Callable[[T], Any], tree: dict, leaf_type: Type[T]) -> dict: - ... +def tree_map(fn: Callable[[T], Any], tree: dict, leaf_type: Type[T]) -> dict: ... @overload -def tree_map(fn: Callable[[T], Any], tree: list, leaf_type: Type[T]) -> list: - ... +def tree_map(fn: Callable[[T], Any], tree: list, leaf_type: Type[T]) -> list: ... @overload -def tree_map(fn: Callable[[T], Any], tree: tuple, leaf_type: Type[T]) -> tuple: - ... +def tree_map(fn: Callable[[T], Any], tree: tuple, leaf_type: Type[T]) -> tuple: ... def tree_map(fn, tree, leaf_type): diff --git a/src/transformers/models/esm/tokenization_esm.py b/src/transformers/models/esm/tokenization_esm.py index 27a889c87e..fbb759c1d1 100644 --- a/src/transformers/models/esm/tokenization_esm.py +++ b/src/transformers/models/esm/tokenization_esm.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for ESM.""" + import os from typing import List, Optional diff --git a/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py b/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py index b8e1ae8512..ade5b8b266 100644 --- a/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +++ b/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" FastSpeech2Conformer model configuration""" +"""FastSpeech2Conformer model configuration""" from typing import Dict diff --git a/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py b/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py index 01c2cece55..e97e276b18 100644 --- a/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +++ b/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch FastSpeech2Conformer model.""" +"""PyTorch FastSpeech2Conformer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py b/src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py index 5b979c8761..65c081c4c1 100644 --- a/src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +++ b/src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for FastSpeech2Conformer.""" + import json import os from typing import Optional, Tuple diff --git a/src/transformers/models/flaubert/configuration_flaubert.py b/src/transformers/models/flaubert/configuration_flaubert.py index 98939e7091..ae5e07245e 100644 --- a/src/transformers/models/flaubert/configuration_flaubert.py +++ b/src/transformers/models/flaubert/configuration_flaubert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flaubert configuration""" +"""Flaubert configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/flaubert/modeling_flaubert.py b/src/transformers/models/flaubert/modeling_flaubert.py index 86db691557..50c6f7ede2 100644 --- a/src/transformers/models/flaubert/modeling_flaubert.py +++ b/src/transformers/models/flaubert/modeling_flaubert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Flaubert model, based on XLM.""" +"""PyTorch Flaubert model, based on XLM.""" import itertools import math diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 0538fa061c..71e371da24 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - TF 2.0 Flaubert model. +TF 2.0 Flaubert model. """ - from __future__ import annotations import itertools diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py index 20f9926422..d8598a5a99 100644 --- a/src/transformers/models/flaubert/tokenization_flaubert.py +++ b/src/transformers/models/flaubert/tokenization_flaubert.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for Flaubert.""" - import json import os import re diff --git a/src/transformers/models/flava/configuration_flava.py b/src/transformers/models/flava/configuration_flava.py index b78b5fdeac..9a6da69193 100644 --- a/src/transformers/models/flava/configuration_flava.py +++ b/src/transformers/models/flava/configuration_flava.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" FLAVA model configurations""" +"""FLAVA model configurations""" import os from typing import Any, Dict, Union diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index 661bfb2dce..5acbad05c3 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch FLAVA model.""" +"""PyTorch FLAVA model.""" import collections import math diff --git a/src/transformers/models/fnet/configuration_fnet.py b/src/transformers/models/fnet/configuration_fnet.py index 18ad618799..90b77fc5d7 100644 --- a/src/transformers/models/fnet/configuration_fnet.py +++ b/src/transformers/models/fnet/configuration_fnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" FNet model configuration""" +"""FNet model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py b/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py index f77a44874a..71660354db 100644 --- a/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py +++ b/src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert FNet checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/fnet/modeling_fnet.py b/src/transformers/models/fnet/modeling_fnet.py index a29a6f8016..8221af6d76 100755 --- a/src/transformers/models/fnet/modeling_fnet.py +++ b/src/transformers/models/fnet/modeling_fnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch FNet model.""" +"""PyTorch FNet model.""" import warnings from dataclasses import dataclass diff --git a/src/transformers/models/fnet/tokenization_fnet.py b/src/transformers/models/fnet/tokenization_fnet.py index a38114eb6d..29095c80ff 100644 --- a/src/transformers/models/fnet/tokenization_fnet.py +++ b/src/transformers/models/fnet/tokenization_fnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for FNet model.""" +"""Tokenization classes for FNet model.""" import os import unicodedata diff --git a/src/transformers/models/fnet/tokenization_fnet_fast.py b/src/transformers/models/fnet/tokenization_fnet_fast.py index f279ad9ca7..3136b9f27c 100644 --- a/src/transformers/models/fnet/tokenization_fnet_fast.py +++ b/src/transformers/models/fnet/tokenization_fnet_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for FNet model.""" - +"""Tokenization classes for FNet model.""" import os from shutil import copyfile diff --git a/src/transformers/models/focalnet/configuration_focalnet.py b/src/transformers/models/focalnet/configuration_focalnet.py index 1b2cca8b48..577530e2ec 100644 --- a/src/transformers/models/focalnet/configuration_focalnet.py +++ b/src/transformers/models/focalnet/configuration_focalnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" FocalNet model configuration""" +"""FocalNet model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/focalnet/modeling_focalnet.py b/src/transformers/models/focalnet/modeling_focalnet.py index 5958eb6a0e..99f2dc658f 100644 --- a/src/transformers/models/focalnet/modeling_focalnet.py +++ b/src/transformers/models/focalnet/modeling_focalnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch FocalNet model.""" - +"""PyTorch FocalNet model.""" import collections.abc import math diff --git a/src/transformers/models/fsmt/configuration_fsmt.py b/src/transformers/models/fsmt/configuration_fsmt.py index 7ed34a6792..72af4ddab2 100644 --- a/src/transformers/models/fsmt/configuration_fsmt.py +++ b/src/transformers/models/fsmt/configuration_fsmt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" FSMT configuration""" - +"""FSMT configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py index 0df585ed46..d1f1ee4cac 100644 --- a/src/transformers/models/fsmt/tokenization_fsmt.py +++ b/src/transformers/models/fsmt/tokenization_fsmt.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for FSMT.""" - import json import os import re diff --git a/src/transformers/models/funnel/configuration_funnel.py b/src/transformers/models/funnel/configuration_funnel.py index c3d6ff9ee4..53d072d4c8 100644 --- a/src/transformers/models/funnel/configuration_funnel.py +++ b/src/transformers/models/funnel/configuration_funnel.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Funnel Transformer model configuration""" +"""Funnel Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py index 848101f083..4eab188f2a 100755 --- a/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Funnel checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index 1e7eaee161..b4fdfd5fc5 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Funnel Transformer model.""" +"""PyTorch Funnel Transformer model.""" import os from dataclasses import dataclass diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index dea3ad550c..ab5f14a4c6 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Funnel model.""" - +"""TF 2.0 Funnel model.""" from __future__ import annotations diff --git a/src/transformers/models/funnel/tokenization_funnel.py b/src/transformers/models/funnel/tokenization_funnel.py index a1580deccf..6a710d660c 100644 --- a/src/transformers/models/funnel/tokenization_funnel.py +++ b/src/transformers/models/funnel/tokenization_funnel.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for Funnel Transformer.""" +"""Tokenization class for Funnel Transformer.""" import collections import os diff --git a/src/transformers/models/funnel/tokenization_funnel_fast.py b/src/transformers/models/funnel/tokenization_funnel_fast.py index 9ff2a3bfef..6a48f2f54a 100644 --- a/src/transformers/models/funnel/tokenization_funnel_fast.py +++ b/src/transformers/models/funnel/tokenization_funnel_fast.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for Funnel Transformer.""" +"""Tokenization class for Funnel Transformer.""" import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/fuyu/configuration_fuyu.py b/src/transformers/models/fuyu/configuration_fuyu.py index 8a5013a651..6cf666d7ee 100644 --- a/src/transformers/models/fuyu/configuration_fuyu.py +++ b/src/transformers/models/fuyu/configuration_fuyu.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fuyu model configuration""" +"""Fuyu model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/fuyu/modeling_fuyu.py b/src/transformers/models/fuyu/modeling_fuyu.py index bdaec5f868..e716e9f334 100644 --- a/src/transformers/models/fuyu/modeling_fuyu.py +++ b/src/transformers/models/fuyu/modeling_fuyu.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Fuyu model.""" +"""PyTorch Fuyu model.""" + from typing import List, Optional, Tuple, Union import torch diff --git a/src/transformers/models/fuyu/processing_fuyu.py b/src/transformers/models/fuyu/processing_fuyu.py index ffa215f1a0..2e46cabfa3 100644 --- a/src/transformers/models/fuyu/processing_fuyu.py +++ b/src/transformers/models/fuyu/processing_fuyu.py @@ -15,6 +15,7 @@ """ Image/Text processor class for GIT """ + import re from typing import Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/gemma/configuration_gemma.py b/src/transformers/models/gemma/configuration_gemma.py index c8a5504516..3bf296a63b 100644 --- a/src/transformers/models/gemma/configuration_gemma.py +++ b/src/transformers/models/gemma/configuration_gemma.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Gemma model configuration""" +"""Gemma model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/gemma/modeling_flax_gemma.py b/src/transformers/models/gemma/modeling_flax_gemma.py index 235f65680f..16291f3c3a 100644 --- a/src/transformers/models/gemma/modeling_flax_gemma.py +++ b/src/transformers/models/gemma/modeling_flax_gemma.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Flax Gemma model.""" + from typing import Optional, Tuple import flax.linen as nn diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index 6003c91cee..565a976fd7 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Gemma model.""" +"""PyTorch Gemma model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/gemma/tokenization_gemma.py b/src/transformers/models/gemma/tokenization_gemma.py index ab19ee23c7..f70c6e807e 100644 --- a/src/transformers/models/gemma/tokenization_gemma.py +++ b/src/transformers/models/gemma/tokenization_gemma.py @@ -14,6 +14,7 @@ # limitations under the License. """Tokenization classes for Gemma.""" + import os from shutil import copyfile from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple diff --git a/src/transformers/models/git/convert_git_to_pytorch.py b/src/transformers/models/git/convert_git_to_pytorch.py index 4e3e8e7b31..238b8124a0 100644 --- a/src/transformers/models/git/convert_git_to_pytorch.py +++ b/src/transformers/models/git/convert_git_to_pytorch.py @@ -16,7 +16,6 @@ URL: https://github.com/microsoft/GenerativeImage2Text/tree/main""" - import argparse from pathlib import Path diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 87b0cb8073..8e14e3a899 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -15,7 +15,6 @@ # limitations under the License. """PyTorch GIT model.""" - import math from dataclasses import dataclass from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/glpn/configuration_glpn.py b/src/transformers/models/glpn/configuration_glpn.py index e1e4b53505..88e1d6e1f0 100644 --- a/src/transformers/models/glpn/configuration_glpn.py +++ b/src/transformers/models/glpn/configuration_glpn.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GLPN model configuration""" +"""GLPN model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/glpn/convert_glpn_to_pytorch.py b/src/transformers/models/glpn/convert_glpn_to_pytorch.py index 5f0183783e..e19ee93819 100644 --- a/src/transformers/models/glpn/convert_glpn_to_pytorch.py +++ b/src/transformers/models/glpn/convert_glpn_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert GLPN checkpoints.""" - import argparse from collections import OrderedDict from pathlib import Path diff --git a/src/transformers/models/glpn/modeling_glpn.py b/src/transformers/models/glpn/modeling_glpn.py index a9a8bc9b36..9fd22ca0f7 100755 --- a/src/transformers/models/glpn/modeling_glpn.py +++ b/src/transformers/models/glpn/modeling_glpn.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GLPN model.""" - +"""PyTorch GLPN model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/gpt2/configuration_gpt2.py b/src/transformers/models/gpt2/configuration_gpt2.py index 249decbbaa..82a2491295 100644 --- a/src/transformers/models/gpt2/configuration_gpt2.py +++ b/src/transformers/models/gpt2/configuration_gpt2.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OpenAI GPT-2 configuration""" +"""OpenAI GPT-2 configuration""" + from collections import OrderedDict from typing import Any, List, Mapping, Optional diff --git a/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py index 066ba06503..33f9dabed0 100755 --- a/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert OpenAI GPT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index c6f5883e22..379cbbfaf1 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 OpenAI GPT-2 model.""" +"""TF 2.0 OpenAI GPT-2 model.""" from __future__ import annotations diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py index 9e81b4473e..9bca559d9e 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2.py +++ b/src/transformers/models/gpt2/tokenization_gpt2.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for OpenAI GPT.""" - import json import os from functools import lru_cache diff --git a/src/transformers/models/gpt2/tokenization_gpt2_fast.py b/src/transformers/models/gpt2/tokenization_gpt2_fast.py index 39e59298c8..e6747119f4 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2_fast.py +++ b/src/transformers/models/gpt2/tokenization_gpt2_fast.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for OpenAI GPT.""" - import json from typing import Optional, Tuple diff --git a/src/transformers/models/gpt_bigcode/configuration_gpt_bigcode.py b/src/transformers/models/gpt_bigcode/configuration_gpt_bigcode.py index 0c295e17a1..5bd72d23f9 100644 --- a/src/transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +++ b/src/transformers/models/gpt_bigcode/configuration_gpt_bigcode.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GPTBigCode configuration""" +"""GPTBigCode configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py index 7b7bfaf1d4..e03a1a3394 100644 --- a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +++ b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """PyTorch GPTBigCode model.""" + import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/gpt_neo/configuration_gpt_neo.py b/src/transformers/models/gpt_neo/configuration_gpt_neo.py index 66c3b6812d..a3c261e855 100644 --- a/src/transformers/models/gpt_neo/configuration_gpt_neo.py +++ b/src/transformers/models/gpt_neo/configuration_gpt_neo.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GPT Neo model configuration""" +"""GPT Neo model configuration""" from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py b/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py index 4a5fddd0a9..3db2285729 100644 --- a/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py +++ b/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert GPT Neo checkpoint.""" - import argparse import json diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index b2891526b3..694c0bc88b 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GPT Neo model.""" - +"""PyTorch GPT Neo model.""" import os from typing import Optional, Tuple, Union diff --git a/src/transformers/models/gpt_neox/configuration_gpt_neox.py b/src/transformers/models/gpt_neox/configuration_gpt_neox.py index d559148a72..8e4c94692e 100644 --- a/src/transformers/models/gpt_neox/configuration_gpt_neox.py +++ b/src/transformers/models/gpt_neox/configuration_gpt_neox.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GPTNeoX model configuration""" +"""GPTNeoX model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index 4980f7c636..ba2fb8aa76 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GPTNeoX model.""" +"""PyTorch GPTNeoX model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py index ba54a1dc7d..2504fa3cc0 100644 --- a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py +++ b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for GPTNeoX.""" + import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py index bde828790d..d3c18a3643 100644 --- a/src/transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GPTNeoX Japanese model configuration""" +"""GPTNeoX Japanese model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py index 24d2113178..b9c4cad0fd 100755 --- a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GPTNeoX model.""" +"""PyTorch GPTNeoX model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py index b761e539a5..f36f7e3fd6 100644 --- a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for GPTNeoXJapanese.""" + import collections import json import os diff --git a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py index 5562efa287..2625701c1a 100644 --- a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py +++ b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Convert GPT-SW3 megatron checkpoints to pytorch""" +"""Convert GPT-SW3 megatron checkpoints to pytorch""" import argparse import os diff --git a/src/transformers/models/gptj/configuration_gptj.py b/src/transformers/models/gptj/configuration_gptj.py index c9c6c4a1c5..1b93f259b0 100644 --- a/src/transformers/models/gptj/configuration_gptj.py +++ b/src/transformers/models/gptj/configuration_gptj.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GPT-J model configuration""" +"""GPT-J model configuration""" + from collections import OrderedDict from typing import Any, List, Mapping, Optional diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 5f1e1d6612..96f4197a87 100644 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GPT-J model.""" +"""PyTorch GPT-J model.""" import warnings from typing import Optional, Tuple, Union diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index 5c315b5b66..b20512b19d 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 GPT-J model.""" +"""TF 2.0 GPT-J model.""" from __future__ import annotations diff --git a/src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py b/src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py index 7630d67bff..23295f3510 100644 --- a/src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py +++ b/src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GPTSAN-japanese model configuration""" +"""GPTSAN-japanese model configuration""" + from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py b/src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py index f897a26483..7faafd9efb 100644 --- a/src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py +++ b/src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GPTSANJapanese model.""" - +"""PyTorch GPTSANJapanese model.""" import copy from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py index 627f7a9b28..b21b8a6f23 100644 --- a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for GPTSANJapanese.""" + import collections import json import os diff --git a/src/transformers/models/graphormer/configuration_graphormer.py b/src/transformers/models/graphormer/configuration_graphormer.py index 7aaba3aa76..9f6904ef38 100644 --- a/src/transformers/models/graphormer/configuration_graphormer.py +++ b/src/transformers/models/graphormer/configuration_graphormer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Graphormer model configuration""" +"""Graphormer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/graphormer/modeling_graphormer.py b/src/transformers/models/graphormer/modeling_graphormer.py index 45ffb91a91..f2696a586b 100755 --- a/src/transformers/models/graphormer/modeling_graphormer.py +++ b/src/transformers/models/graphormer/modeling_graphormer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Graphormer model.""" +"""PyTorch Graphormer model.""" import math from typing import Iterable, Iterator, List, Optional, Tuple, Union diff --git a/src/transformers/models/grounding_dino/configuration_grounding_dino.py b/src/transformers/models/grounding_dino/configuration_grounding_dino.py index 177b4bab7e..4c70abf5bd 100644 --- a/src/transformers/models/grounding_dino/configuration_grounding_dino.py +++ b/src/transformers/models/grounding_dino/configuration_grounding_dino.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Grounding DINO model configuration""" +"""Grounding DINO model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 72fc14edeb..1afe3ad44c 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Grounding DINO model.""" +"""PyTorch Grounding DINO model.""" import copy import math diff --git a/src/transformers/models/groupvit/configuration_groupvit.py b/src/transformers/models/groupvit/configuration_groupvit.py index 1f17a0a7e6..4051fd069d 100644 --- a/src/transformers/models/groupvit/configuration_groupvit.py +++ b/src/transformers/models/groupvit/configuration_groupvit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" GroupViT model configuration""" +"""GroupViT model configuration""" import os from collections import OrderedDict diff --git a/src/transformers/models/groupvit/modeling_groupvit.py b/src/transformers/models/groupvit/modeling_groupvit.py index aa8e803357..99be160319 100644 --- a/src/transformers/models/groupvit/modeling_groupvit.py +++ b/src/transformers/models/groupvit/modeling_groupvit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GroupViT model.""" - +"""PyTorch GroupViT model.""" import collections.abc import math diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 78f7f793c3..f06c5f57f8 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 GroupViT model.""" - +"""TF 2.0 GroupViT model.""" from __future__ import annotations diff --git a/src/transformers/models/hubert/configuration_hubert.py b/src/transformers/models/hubert/configuration_hubert.py index 293b9f76f2..20977cff87 100644 --- a/src/transformers/models/hubert/configuration_hubert.py +++ b/src/transformers/models/hubert/configuration_hubert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Hubert model configuration""" +"""Hubert model configuration""" import functools import operator diff --git a/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py index 571761e022..f5914f35c5 100644 --- a/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Hubert checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py index 9a70fb6db7..6478fdadf1 100644 --- a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Hubert checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py index 51908f9302..ff15b90088 100644 --- a/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Hubert checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 83d90f5ded..388d16415c 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Hubert model.""" +"""PyTorch Hubert model.""" import warnings from typing import Optional, Tuple, Union diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 142ab0961d..4c31fc78c2 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow Hubert model.""" +"""TensorFlow Hubert model.""" from __future__ import annotations diff --git a/src/transformers/models/ibert/configuration_ibert.py b/src/transformers/models/ibert/configuration_ibert.py index afa67d6d6a..9af660669d 100644 --- a/src/transformers/models/ibert/configuration_ibert.py +++ b/src/transformers/models/ibert/configuration_ibert.py @@ -14,7 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" I-BERT configuration""" +"""I-BERT configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/idefics/configuration_idefics.py b/src/transformers/models/idefics/configuration_idefics.py index 8b61238ed9..e286ef3705 100644 --- a/src/transformers/models/idefics/configuration_idefics.py +++ b/src/transformers/models/idefics/configuration_idefics.py @@ -17,7 +17,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Idefics model configuration""" +"""Idefics model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 2ed51413d9..6d65825986 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Idefics model.""" +"""PyTorch Idefics model.""" + from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/idefics/modeling_tf_idefics.py b/src/transformers/models/idefics/modeling_tf_idefics.py index 8d9322b0ed..c5ce2935d3 100644 --- a/src/transformers/models/idefics/modeling_tf_idefics.py +++ b/src/transformers/models/idefics/modeling_tf_idefics.py @@ -17,7 +17,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Idefics model. """ +"""TF 2.0 Idefics model.""" from __future__ import annotations diff --git a/src/transformers/models/idefics/perceiver.py b/src/transformers/models/idefics/perceiver.py index 888c5b0bb9..91e80f8516 100644 --- a/src/transformers/models/idefics/perceiver.py +++ b/src/transformers/models/idefics/perceiver.py @@ -36,6 +36,7 @@ References: - Code borrowed w/ love from: https://github.com/lucidrains/flamingo-pytorch """ + from typing import Optional, Tuple import torch diff --git a/src/transformers/models/idefics/perceiver_tf.py b/src/transformers/models/idefics/perceiver_tf.py index c9e76004a7..0f1c615349 100644 --- a/src/transformers/models/idefics/perceiver_tf.py +++ b/src/transformers/models/idefics/perceiver_tf.py @@ -36,6 +36,7 @@ References: - Code borrowed w/ love from: https://github.com/lucidrains/flamingo-pytorch """ + from typing import Optional, Tuple import tensorflow as tf diff --git a/src/transformers/models/idefics/vision.py b/src/transformers/models/idefics/vision.py index d90f837b3c..847e92e89c 100644 --- a/src/transformers/models/idefics/vision.py +++ b/src/transformers/models/idefics/vision.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch IdeficsVision model: a copy of CLIPVisionModel using a simpler config object""" - +"""PyTorch IdeficsVision model: a copy of CLIPVisionModel using a simpler config object""" import math from dataclasses import dataclass diff --git a/src/transformers/models/idefics/vision_tf.py b/src/transformers/models/idefics/vision_tf.py index 0060bb7ac9..7acfa01939 100644 --- a/src/transformers/models/idefics/vision_tf.py +++ b/src/transformers/models/idefics/vision_tf.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF IdeficsVision model: a copy of CLIPVisionModel using a simpler config object""" - +"""TF IdeficsVision model: a copy of CLIPVisionModel using a simpler config object""" import math from dataclasses import dataclass diff --git a/src/transformers/models/imagegpt/configuration_imagegpt.py b/src/transformers/models/imagegpt/configuration_imagegpt.py index 906e61eeef..c54c11491c 100644 --- a/src/transformers/models/imagegpt/configuration_imagegpt.py +++ b/src/transformers/models/imagegpt/configuration_imagegpt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OpenAI ImageGPT configuration""" +"""OpenAI ImageGPT configuration""" from collections import OrderedDict from typing import TYPE_CHECKING, Any, Mapping, Optional diff --git a/src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py b/src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py index 0212bd485b..182d66b9af 100644 --- a/src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py +++ b/src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert OpenAI Image GPT checkpoints.""" - import argparse import torch diff --git a/src/transformers/models/informer/modeling_informer.py b/src/transformers/models/informer/modeling_informer.py index 844bf474fa..6b5507a015 100644 --- a/src/transformers/models/informer/modeling_informer.py +++ b/src/transformers/models/informer/modeling_informer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Informer model.""" +"""PyTorch Informer model.""" from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/instructblip/configuration_instructblip.py b/src/transformers/models/instructblip/configuration_instructblip.py index 23b743ecb5..636d18b990 100644 --- a/src/transformers/models/instructblip/configuration_instructblip.py +++ b/src/transformers/models/instructblip/configuration_instructblip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" InstructBLIP model configuration""" +"""InstructBLIP model configuration""" import os from typing import Union diff --git a/src/transformers/models/instructblip/modeling_instructblip.py b/src/transformers/models/instructblip/modeling_instructblip.py index 0fb089b627..386b69cd3b 100644 --- a/src/transformers/models/instructblip/modeling_instructblip.py +++ b/src/transformers/models/instructblip/modeling_instructblip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch InstructBLIP model.""" +"""PyTorch InstructBLIP model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/jamba/configuration_jamba.py b/src/transformers/models/jamba/configuration_jamba.py index de9cd378bd..6394c74012 100644 --- a/src/transformers/models/jamba/configuration_jamba.py +++ b/src/transformers/models/jamba/configuration_jamba.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Jamba model configuration""" +"""Jamba model configuration""" + import math from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/jamba/modeling_jamba.py b/src/transformers/models/jamba/modeling_jamba.py index 5a582acdd2..3510b8fb5a 100755 --- a/src/transformers/models/jamba/modeling_jamba.py +++ b/src/transformers/models/jamba/modeling_jamba.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Jamba model.""" +"""PyTorch Jamba model.""" + import inspect import math from typing import Any, Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/jukebox/configuration_jukebox.py b/src/transformers/models/jukebox/configuration_jukebox.py index 19203732a9..a2eee03885 100644 --- a/src/transformers/models/jukebox/configuration_jukebox.py +++ b/src/transformers/models/jukebox/configuration_jukebox.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Jukebox configuration""" +"""Jukebox configuration""" import os from typing import List, Union diff --git a/src/transformers/models/jukebox/tokenization_jukebox.py b/src/transformers/models/jukebox/tokenization_jukebox.py index cd478d6f6b..4952adda64 100644 --- a/src/transformers/models/jukebox/tokenization_jukebox.py +++ b/src/transformers/models/jukebox/tokenization_jukebox.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for OpenAI Jukebox.""" - import json import os import re diff --git a/src/transformers/models/kosmos2/configuration_kosmos2.py b/src/transformers/models/kosmos2/configuration_kosmos2.py index f922b60934..fc5f15dcae 100644 --- a/src/transformers/models/kosmos2/configuration_kosmos2.py +++ b/src/transformers/models/kosmos2/configuration_kosmos2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" KOSMOS-2 model configuration""" +"""KOSMOS-2 model configuration""" import os from typing import Union diff --git a/src/transformers/models/kosmos2/modeling_kosmos2.py b/src/transformers/models/kosmos2/modeling_kosmos2.py index 161ebbf95c..9585bd891e 100644 --- a/src/transformers/models/kosmos2/modeling_kosmos2.py +++ b/src/transformers/models/kosmos2/modeling_kosmos2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch KOSMOS-2 model.""" - +"""PyTorch KOSMOS-2 model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/layoutlm/configuration_layoutlm.py b/src/transformers/models/layoutlm/configuration_layoutlm.py index 88081831bc..4198bb26e9 100644 --- a/src/transformers/models/layoutlm/configuration_layoutlm.py +++ b/src/transformers/models/layoutlm/configuration_layoutlm.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LayoutLM model configuration""" +"""LayoutLM model configuration""" + from collections import OrderedDict from typing import Any, List, Mapping, Optional diff --git a/src/transformers/models/layoutlm/modeling_layoutlm.py b/src/transformers/models/layoutlm/modeling_layoutlm.py index dbaa1aff81..4a761fcc0d 100644 --- a/src/transformers/models/layoutlm/modeling_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_layoutlm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch LayoutLM model.""" - +"""PyTorch LayoutLM model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 42bcbe00d9..5e95f3a3b5 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 LayoutLM model.""" - +"""TF 2.0 LayoutLM model.""" from __future__ import annotations diff --git a/src/transformers/models/layoutlm/tokenization_layoutlm.py b/src/transformers/models/layoutlm/tokenization_layoutlm.py index 836b1aab88..fa6a5f29e9 100644 --- a/src/transformers/models/layoutlm/tokenization_layoutlm.py +++ b/src/transformers/models/layoutlm/tokenization_layoutlm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model LayoutLM.""" +"""Tokenization class for model LayoutLM.""" import collections import os diff --git a/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py b/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py index fa3d95132b..db1409dfca 100644 --- a/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py +++ b/src/transformers/models/layoutlm/tokenization_layoutlm_fast.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model LayoutLM.""" +"""Tokenization class for model LayoutLM.""" import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py b/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py index 6a2eb9ff39..db1fdf7da2 100644 --- a/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LayoutLMv2 model configuration""" +"""LayoutLMv2 model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import is_detectron2_available, logging diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index 3e3b7f2b14..70cb1f6e9f 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch LayoutLMv2 model.""" +"""PyTorch LayoutLMv2 model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py index 592adaa2f5..aa50a3228e 100644 --- a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LayoutLMv3 model configuration""" +"""LayoutLMv3 model configuration""" from collections import OrderedDict from typing import TYPE_CHECKING, Any, Mapping, Optional diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index 7479754f0f..6415f43247 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -14,7 +14,6 @@ # limitations under the License. """TF 2.0 LayoutLMv3 model.""" - from __future__ import annotations import collections diff --git a/src/transformers/models/layoutxlm/processing_layoutxlm.py b/src/transformers/models/layoutxlm/processing_layoutxlm.py index b1d885255b..1cbd3f20c2 100644 --- a/src/transformers/models/layoutxlm/processing_layoutxlm.py +++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py @@ -15,6 +15,7 @@ """ Processor class for LayoutXLM. """ + import warnings from typing import List, Optional, Union diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py index bbfdf44a1e..3ab57ac892 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for LayoutXLM model.""" - +"""Tokenization classes for LayoutXLM model.""" import os from shutil import copyfile diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index e899d8b22e..bd6533598d 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for LayoutXLM model.""" - +"""Tokenization classes for LayoutXLM model.""" import os from shutil import copyfile diff --git a/src/transformers/models/led/configuration_led.py b/src/transformers/models/led/configuration_led.py index 9933ef2253..9ed3b148c7 100644 --- a/src/transformers/models/led/configuration_led.py +++ b/src/transformers/models/led/configuration_led.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LED model configuration""" +"""LED model configuration""" from typing import List, Union diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 982de403c2..41b6c0a2be 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch LED model.""" - +"""PyTorch LED model.""" import math import warnings diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index f64ed7758d..8c414648d6 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 LED model.""" - +"""TF 2.0 LED model.""" from __future__ import annotations diff --git a/src/transformers/models/levit/configuration_levit.py b/src/transformers/models/levit/configuration_levit.py index 197e5fe5ec..5b04930959 100644 --- a/src/transformers/models/levit/configuration_levit.py +++ b/src/transformers/models/levit/configuration_levit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LeViT model configuration""" +"""LeViT model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/levit/convert_levit_timm_to_pytorch.py b/src/transformers/models/levit/convert_levit_timm_to_pytorch.py index 6f285a6de3..afef3f73de 100644 --- a/src/transformers/models/levit/convert_levit_timm_to_pytorch.py +++ b/src/transformers/models/levit/convert_levit_timm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert LeViT checkpoints from timm.""" - import argparse import json from collections import OrderedDict diff --git a/src/transformers/models/levit/modeling_levit.py b/src/transformers/models/levit/modeling_levit.py index 0de3789c2c..af202787a1 100644 --- a/src/transformers/models/levit/modeling_levit.py +++ b/src/transformers/models/levit/modeling_levit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch LeViT model.""" +"""PyTorch LeViT model.""" import itertools from dataclasses import dataclass diff --git a/src/transformers/models/lilt/configuration_lilt.py b/src/transformers/models/lilt/configuration_lilt.py index fdfa2192a1..57ab8884ed 100644 --- a/src/transformers/models/lilt/configuration_lilt.py +++ b/src/transformers/models/lilt/configuration_lilt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LiLT configuration""" +"""LiLT configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/llama/configuration_llama.py b/src/transformers/models/llama/configuration_llama.py index b406b12fc7..699fd0199f 100644 --- a/src/transformers/models/llama/configuration_llama.py +++ b/src/transformers/models/llama/configuration_llama.py @@ -17,7 +17,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LLaMA model configuration""" +"""LLaMA model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/llama/modeling_flax_llama.py b/src/transformers/models/llama/modeling_flax_llama.py index 1e7fb5d1a1..1c9f1c4adc 100644 --- a/src/transformers/models/llama/modeling_flax_llama.py +++ b/src/transformers/models/llama/modeling_flax_llama.py @@ -18,6 +18,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Flax LLaMA model.""" + from functools import partial from typing import Optional, Tuple diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py index 2b4920c668..5392afb763 100644 --- a/src/transformers/models/llama/tokenization_llama.py +++ b/src/transformers/models/llama/tokenization_llama.py @@ -19,6 +19,7 @@ # limitations under the License. """Tokenization classes for LLaMA.""" + import os from shutil import copyfile from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple diff --git a/src/transformers/models/llava/configuration_llava.py b/src/transformers/models/llava/configuration_llava.py index f146014505..6930dcc78c 100644 --- a/src/transformers/models/llava/configuration_llava.py +++ b/src/transformers/models/llava/configuration_llava.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Llava model configuration""" +"""Llava model configuration""" import warnings diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py index ff010f7442..7016cd5009 100644 --- a/src/transformers/models/llava/processing_llava.py +++ b/src/transformers/models/llava/processing_llava.py @@ -16,7 +16,6 @@ Processor class for Llava. """ - from typing import List, Optional, Union from ...feature_extraction_utils import BatchFeature diff --git a/src/transformers/models/llava_next/configuration_llava_next.py b/src/transformers/models/llava_next/configuration_llava_next.py index 21f3d499e3..3111393867 100644 --- a/src/transformers/models/llava_next/configuration_llava_next.py +++ b/src/transformers/models/llava_next/configuration_llava_next.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Llava-NeXT model configuration""" +"""Llava-NeXT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/longformer/configuration_longformer.py b/src/transformers/models/longformer/configuration_longformer.py index 0cafbf5d69..fc60937637 100644 --- a/src/transformers/models/longformer/configuration_longformer.py +++ b/src/transformers/models/longformer/configuration_longformer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Longformer configuration""" +"""Longformer configuration""" + from collections import OrderedDict from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Union diff --git a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py index ed7d32ab3e..4ef2131228 100644 --- a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py +++ b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert RoBERTa checkpoint.""" - import argparse import pytorch_lightning as pl diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index be3ccf761b..b32cde202c 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -14,7 +14,6 @@ # limitations under the License. """Tensorflow Longformer model.""" - from __future__ import annotations import warnings diff --git a/src/transformers/models/longformer/tokenization_longformer_fast.py b/src/transformers/models/longformer/tokenization_longformer_fast.py index 02b74818a2..d4b4228b03 100644 --- a/src/transformers/models/longformer/tokenization_longformer_fast.py +++ b/src/transformers/models/longformer/tokenization_longformer_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Fast Tokenization classes for Longformer.""" + import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/longt5/configuration_longt5.py b/src/transformers/models/longt5/configuration_longt5.py index 839428f27e..0e541ae2a1 100644 --- a/src/transformers/models/longt5/configuration_longt5.py +++ b/src/transformers/models/longt5/configuration_longt5.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LongT5 model configuration""" +"""LongT5 model configuration""" + from typing import Mapping from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/longt5/convert_longt5x_checkpoint_to_flax.py b/src/transformers/models/longt5/convert_longt5x_checkpoint_to_flax.py index 5a1394c719..cf5c2d52d8 100644 --- a/src/transformers/models/longt5/convert_longt5x_checkpoint_to_flax.py +++ b/src/transformers/models/longt5/convert_longt5x_checkpoint_to_flax.py @@ -82,9 +82,9 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f # Global input layer norm if config.model_type == "longt5" and config.encoder_attention_type == "transient-global": - flax_model_encoder_layer_block["0"][encoder_attn_name]["global_input_layer_norm"][ - "weight" - ] = t5x_global_layer_norm + flax_model_encoder_layer_block["0"][encoder_attn_name]["global_input_layer_norm"]["weight"] = ( + t5x_global_layer_norm + ) if split_mlp_wi: flax_model_encoder_layer_block["1"]["DenseReluDense"]["wi_0"]["kernel"] = t5x_mlp_wi_0 diff --git a/src/transformers/models/longt5/modeling_flax_longt5.py b/src/transformers/models/longt5/modeling_flax_longt5.py index d47f644ba3..4ab18a3ca7 100644 --- a/src/transformers/models/longt5/modeling_flax_longt5.py +++ b/src/transformers/models/longt5/modeling_flax_longt5.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax LongT5 model.""" - +"""Flax LongT5 model.""" import copy from typing import Any, Callable, List, Optional, Tuple diff --git a/src/transformers/models/longt5/modeling_longt5.py b/src/transformers/models/longt5/modeling_longt5.py index 314785b82c..b2a6ed11ca 100644 --- a/src/transformers/models/longt5/modeling_longt5.py +++ b/src/transformers/models/longt5/modeling_longt5.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch LongT5 model.""" - +"""PyTorch LongT5 model.""" import copy import math diff --git a/src/transformers/models/luke/configuration_luke.py b/src/transformers/models/luke/configuration_luke.py index d3f14decac..44e1002cfb 100644 --- a/src/transformers/models/luke/configuration_luke.py +++ b/src/transformers/models/luke/configuration_luke.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LUKE configuration""" +"""LUKE configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/lxmert/configuration_lxmert.py b/src/transformers/models/lxmert/configuration_lxmert.py index ec3a7c1864..d753e75227 100644 --- a/src/transformers/models/lxmert/configuration_lxmert.py +++ b/src/transformers/models/lxmert/configuration_lxmert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" LXMERT model configuration""" - +"""LXMERT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py index f8eb86f1d1..1dd77bc36f 100755 --- a/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert LXMERT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/lxmert/modeling_lxmert.py b/src/transformers/models/lxmert/modeling_lxmert.py index 6e2ae7d22e..a7f0fea8f4 100644 --- a/src/transformers/models/lxmert/modeling_lxmert.py +++ b/src/transformers/models/lxmert/modeling_lxmert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch LXMERT model.""" - +"""PyTorch LXMERT model.""" import math import os diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 2b51fb7510..8a833fb35a 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -14,8 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 LXMERT model.""" - +"""TF 2.0 LXMERT model.""" from __future__ import annotations diff --git a/src/transformers/models/m2m_100/configuration_m2m_100.py b/src/transformers/models/m2m_100/configuration_m2m_100.py index 73840e5132..7ae3c44127 100644 --- a/src/transformers/models/m2m_100/configuration_m2m_100.py +++ b/src/transformers/models/m2m_100/configuration_m2m_100.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" M2M100 model configuration""" +"""M2M100 model configuration""" + from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/m2m_100/tokenization_m2m_100.py b/src/transformers/models/m2m_100/tokenization_m2m_100.py index 96f79ee4e7..403d8cc507 100644 --- a/src/transformers/models/m2m_100/tokenization_m2m_100.py +++ b/src/transformers/models/m2m_100/tokenization_m2m_100.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for M2M100.""" + import json import os from pathlib import Path diff --git a/src/transformers/models/marian/configuration_marian.py b/src/transformers/models/marian/configuration_marian.py index 5921fde981..5a3f083804 100644 --- a/src/transformers/models/marian/configuration_marian.py +++ b/src/transformers/models/marian/configuration_marian.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Marian model configuration""" +"""Marian model configuration""" + from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/marian/modeling_flax_marian.py b/src/transformers/models/marian/modeling_flax_marian.py index 2002d60caa..e33df2e06b 100644 --- a/src/transformers/models/marian/modeling_flax_marian.py +++ b/src/transformers/models/marian/modeling_flax_marian.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax Marian model.""" +"""Flax Marian model.""" import math import random diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index 10d7f1b6b2..2045f67354 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -14,7 +14,6 @@ # limitations under the License. """PyTorch MarianMTModel model, ported from the Marian C++ repo.""" - import copy import math from typing import Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index c6d5355f70..30c6157d50 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Marian model.""" - +"""TF 2.0 Marian model.""" from __future__ import annotations diff --git a/src/transformers/models/markuplm/configuration_markuplm.py b/src/transformers/models/markuplm/configuration_markuplm.py index 581cc0f349..e348a5c5a1 100644 --- a/src/transformers/models/markuplm/configuration_markuplm.py +++ b/src/transformers/models/markuplm/configuration_markuplm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MarkupLM model configuration""" +"""MarkupLM model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/markuplm/modeling_markuplm.py b/src/transformers/models/markuplm/modeling_markuplm.py index 29f433ae0f..a3aa69621c 100755 --- a/src/transformers/models/markuplm/modeling_markuplm.py +++ b/src/transformers/models/markuplm/modeling_markuplm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MarkupLM model.""" +"""PyTorch MarkupLM model.""" import math import os diff --git a/src/transformers/models/markuplm/processing_markuplm.py b/src/transformers/models/markuplm/processing_markuplm.py index 81aaca9e5c..757c146c58 100644 --- a/src/transformers/models/markuplm/processing_markuplm.py +++ b/src/transformers/models/markuplm/processing_markuplm.py @@ -15,6 +15,7 @@ """ Processor class for MarkupLM. """ + from typing import Optional, Union from ...file_utils import TensorType diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index ed97ed2458..0a49127a52 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Mask2Former model configuration""" +"""Mask2Former model configuration""" + from typing import Dict, List, Optional from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index f37b5b14fc..faaca46ed2 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Mask2Former model.""" +"""PyTorch Mask2Former model.""" import math import warnings diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index f82fe199b7..149f3cb52f 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MaskFormer model configuration""" +"""MaskFormer model configuration""" + from typing import Dict, Optional from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/maskformer/configuration_maskformer_swin.py b/src/transformers/models/maskformer/configuration_maskformer_swin.py index 56d8f746db..1cc2feffbf 100644 --- a/src/transformers/models/maskformer/configuration_maskformer_swin.py +++ b/src/transformers/models/maskformer/configuration_maskformer_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MaskFormer Swin Transformer model configuration""" +"""MaskFormer Swin Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py index fec508de41..873498fa00 100644 --- a/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py @@ -15,7 +15,6 @@ """Convert MaskFormer checkpoints with ResNet backbone from the original repository. URL: https://github.com/facebookresearch/MaskFormer""" - import argparse import json import pickle diff --git a/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py index 8f0d0e99df..8f8441ab8f 100644 --- a/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py @@ -15,7 +15,6 @@ """Convert MaskFormer checkpoints with Swin backbone from the original repository. URL: https://github.com/facebookresearch/MaskFormer""" - import argparse import json import pickle diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 2c2746603a..271ad5cc07 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MaskFormer model.""" +"""PyTorch MaskFormer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/mbart/configuration_mbart.py b/src/transformers/models/mbart/configuration_mbart.py index 4823047dcf..8a4fe14b6c 100644 --- a/src/transformers/models/mbart/configuration_mbart.py +++ b/src/transformers/models/mbart/configuration_mbart.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MBART model configuration""" +"""MBART model configuration""" + from collections import OrderedDict from typing import Any, Mapping, Optional diff --git a/src/transformers/models/mbart/modeling_flax_mbart.py b/src/transformers/models/mbart/modeling_flax_mbart.py index 907fd53aa1..0f943df13c 100644 --- a/src/transformers/models/mbart/modeling_flax_mbart.py +++ b/src/transformers/models/mbart/modeling_flax_mbart.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax MBart model.""" +"""Flax MBart model.""" import math import random diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 34eab5b8f3..a7f7be3a85 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MBART model.""" +"""PyTorch MBART model.""" + import copy import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 2c134b520d..8c9bb98120 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 MBart model.""" - +"""TF 2.0 MBart model.""" from __future__ import annotations diff --git a/src/transformers/models/mega/configuration_mega.py b/src/transformers/models/mega/configuration_mega.py index 8287a3938e..b090a020af 100644 --- a/src/transformers/models/mega/configuration_mega.py +++ b/src/transformers/models/mega/configuration_mega.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MEGA configuration""" +"""MEGA configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py index 2fe75ba273..1f791dab24 100644 --- a/src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py @@ -24,6 +24,7 @@ Requirements: - clone the pretrained weights for the original implementation from the hugging face repo * use this location as the path for pretrained weights """ + import argparse # utilities to import the model weights and config file diff --git a/src/transformers/models/megatron_bert/configuration_megatron_bert.py b/src/transformers/models/megatron_bert/configuration_megatron_bert.py index d3be4db99b..a0e216a535 100644 --- a/src/transformers/models/megatron_bert/configuration_megatron_bert.py +++ b/src/transformers/models/megatron_bert/configuration_megatron_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MEGATRON_BERT model configuration""" +"""MEGATRON_BERT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/megatron_bert/modeling_megatron_bert.py b/src/transformers/models/megatron_bert/modeling_megatron_bert.py index 270caf9856..ff0f536396 100755 --- a/src/transformers/models/megatron_bert/modeling_megatron_bert.py +++ b/src/transformers/models/megatron_bert/modeling_megatron_bert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MegatronBERT model.""" - +"""PyTorch MegatronBERT model.""" import math import os diff --git a/src/transformers/models/mgp_str/configuration_mgp_str.py b/src/transformers/models/mgp_str/configuration_mgp_str.py index 2ce4ffd0c6..d7850342dc 100644 --- a/src/transformers/models/mgp_str/configuration_mgp_str.py +++ b/src/transformers/models/mgp_str/configuration_mgp_str.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MGP-STR model configuration""" +"""MGP-STR model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/mgp_str/modeling_mgp_str.py b/src/transformers/models/mgp_str/modeling_mgp_str.py index 6754cebcae..6b18c45e01 100644 --- a/src/transformers/models/mgp_str/modeling_mgp_str.py +++ b/src/transformers/models/mgp_str/modeling_mgp_str.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MGP-STR model.""" +"""PyTorch MGP-STR model.""" import collections.abc from dataclasses import dataclass diff --git a/src/transformers/models/mistral/configuration_mistral.py b/src/transformers/models/mistral/configuration_mistral.py index e281802792..5f3e706339 100644 --- a/src/transformers/models/mistral/configuration_mistral.py +++ b/src/transformers/models/mistral/configuration_mistral.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Mistral model configuration""" +"""Mistral model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/mistral/modeling_flax_mistral.py b/src/transformers/models/mistral/modeling_flax_mistral.py index 3480fc7214..3bff2a6281 100644 --- a/src/transformers/models/mistral/modeling_flax_mistral.py +++ b/src/transformers/models/mistral/modeling_flax_mistral.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax Mistral model.""" +"""Flax Mistral model.""" + from typing import Optional, Tuple import flax.linen as nn diff --git a/src/transformers/models/mixtral/configuration_mixtral.py b/src/transformers/models/mixtral/configuration_mixtral.py index 5304afd513..d40f516e58 100644 --- a/src/transformers/models/mixtral/configuration_mixtral.py +++ b/src/transformers/models/mixtral/configuration_mixtral.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Mixtral model configuration""" +"""Mixtral model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 70e1746392..4c694de0c3 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Mixtral model.""" +"""PyTorch Mixtral model.""" + import inspect import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index 3ef5e64ed2..004f6526f5 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for mLUKE.""" - +"""Tokenization classes for mLUKE.""" import itertools import json diff --git a/src/transformers/models/mobilebert/configuration_mobilebert.py b/src/transformers/models/mobilebert/configuration_mobilebert.py index 197e29fcfe..2370fa9b57 100644 --- a/src/transformers/models/mobilebert/configuration_mobilebert.py +++ b/src/transformers/models/mobilebert/configuration_mobilebert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MobileBERT model configuration""" +"""MobileBERT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index bab2cbac8e..d73c276b4f 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 MobileBERT model.""" - +"""TF 2.0 MobileBERT model.""" from __future__ import annotations diff --git a/src/transformers/models/mobilebert/tokenization_mobilebert.py b/src/transformers/models/mobilebert/tokenization_mobilebert.py index ccfdcc31ff..32dc995668 100644 --- a/src/transformers/models/mobilebert/tokenization_mobilebert.py +++ b/src/transformers/models/mobilebert/tokenization_mobilebert.py @@ -15,7 +15,6 @@ # limitations under the License. """Tokenization classes for MobileBERT.""" - import collections import os import unicodedata diff --git a/src/transformers/models/mobilenet_v1/configuration_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/configuration_mobilenet_v1.py index 70075bcc94..2bf204a66d 100644 --- a/src/transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/configuration_mobilenet_v1.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MobileNetV1 model configuration""" +"""MobileNetV1 model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py index 4985e0ff22..1b53bbeab4 100644 --- a/src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert MobileNetV1 checkpoints from the tensorflow/models library.""" - import argparse import json import re diff --git a/src/transformers/models/mobilenet_v1/modeling_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/modeling_mobilenet_v1.py index 85a398f898..00f8c501b2 100755 --- a/src/transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/modeling_mobilenet_v1.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MobileNetV1 model.""" - +"""PyTorch MobileNetV1 model.""" from typing import Optional, Union diff --git a/src/transformers/models/mobilenet_v2/configuration_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/configuration_mobilenet_v2.py index 81e590d5a3..25bcfa5785 100644 --- a/src/transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/configuration_mobilenet_v2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MobileNetV2 model configuration""" +"""MobileNetV2 model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/mobilenet_v2/convert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/mobilenet_v2/convert_original_tf_checkpoint_to_pytorch.py index 443bf8fd7e..1fdb9783cc 100644 --- a/src/transformers/models/mobilenet_v2/convert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/mobilenet_v2/convert_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert MobileNetV2 checkpoints from the tensorflow/models library.""" - import argparse import json import re diff --git a/src/transformers/models/mobilenet_v2/modeling_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/modeling_mobilenet_v2.py index 3c8fd5193a..7eb231380e 100755 --- a/src/transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/modeling_mobilenet_v2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MobileNetV2 model.""" - +"""PyTorch MobileNetV2 model.""" from typing import Optional, Union @@ -134,29 +133,29 @@ def _build_tf_to_pytorch_map(model, config, tf_weights=None): tf_to_pt_map[prefix + "BatchNorm/beta"] = model.segmentation_head.conv_pool.normalization.bias tf_to_pt_map[prefix + "BatchNorm/gamma"] = model.segmentation_head.conv_pool.normalization.weight tf_to_pt_map[prefix + "BatchNorm/moving_mean"] = model.segmentation_head.conv_pool.normalization.running_mean - tf_to_pt_map[ - prefix + "BatchNorm/moving_variance" - ] = model.segmentation_head.conv_pool.normalization.running_var + tf_to_pt_map[prefix + "BatchNorm/moving_variance"] = ( + model.segmentation_head.conv_pool.normalization.running_var + ) prefix = "aspp0/" tf_to_pt_map[prefix + "weights"] = model.segmentation_head.conv_aspp.convolution.weight tf_to_pt_map[prefix + "BatchNorm/beta"] = model.segmentation_head.conv_aspp.normalization.bias tf_to_pt_map[prefix + "BatchNorm/gamma"] = model.segmentation_head.conv_aspp.normalization.weight tf_to_pt_map[prefix + "BatchNorm/moving_mean"] = model.segmentation_head.conv_aspp.normalization.running_mean - tf_to_pt_map[ - prefix + "BatchNorm/moving_variance" - ] = model.segmentation_head.conv_aspp.normalization.running_var + tf_to_pt_map[prefix + "BatchNorm/moving_variance"] = ( + model.segmentation_head.conv_aspp.normalization.running_var + ) prefix = "concat_projection/" tf_to_pt_map[prefix + "weights"] = model.segmentation_head.conv_projection.convolution.weight tf_to_pt_map[prefix + "BatchNorm/beta"] = model.segmentation_head.conv_projection.normalization.bias tf_to_pt_map[prefix + "BatchNorm/gamma"] = model.segmentation_head.conv_projection.normalization.weight - tf_to_pt_map[ - prefix + "BatchNorm/moving_mean" - ] = model.segmentation_head.conv_projection.normalization.running_mean - tf_to_pt_map[ - prefix + "BatchNorm/moving_variance" - ] = model.segmentation_head.conv_projection.normalization.running_var + tf_to_pt_map[prefix + "BatchNorm/moving_mean"] = ( + model.segmentation_head.conv_projection.normalization.running_mean + ) + tf_to_pt_map[prefix + "BatchNorm/moving_variance"] = ( + model.segmentation_head.conv_projection.normalization.running_var + ) prefix = "logits/semantic/" tf_to_pt_map[ema(prefix + "weights")] = model.segmentation_head.classifier.convolution.weight diff --git a/src/transformers/models/mobilevit/configuration_mobilevit.py b/src/transformers/models/mobilevit/configuration_mobilevit.py index 5650002b3c..500f8b23db 100644 --- a/src/transformers/models/mobilevit/configuration_mobilevit.py +++ b/src/transformers/models/mobilevit/configuration_mobilevit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MobileViT model configuration""" +"""MobileViT model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py index e251b124b4..522d6671d1 100644 --- a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert MobileViT checkpoints from the ml-cvnets library.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/mobilevit/modeling_mobilevit.py b/src/transformers/models/mobilevit/modeling_mobilevit.py index 7e23d1b5ca..1931e975c8 100755 --- a/src/transformers/models/mobilevit/modeling_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_mobilevit.py @@ -14,8 +14,7 @@ # limitations under the License. # # Original license: https://github.com/apple/ml-cvnets/blob/main/LICENSE -""" PyTorch MobileViT model.""" - +"""PyTorch MobileViT model.""" import math from typing import Dict, Optional, Set, Tuple, Union diff --git a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py index 179f209e87..01bd35b7ab 100644 --- a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py @@ -14,7 +14,7 @@ # limitations under the License. # # Original license: https://github.com/apple/ml-cvnets/blob/main/LICENSE -""" TensorFlow 2.0 MobileViT model.""" +"""TensorFlow 2.0 MobileViT model.""" from __future__ import annotations diff --git a/src/transformers/models/mobilevitv2/configuration_mobilevitv2.py b/src/transformers/models/mobilevitv2/configuration_mobilevitv2.py index 957a43f770..65260d6501 100644 --- a/src/transformers/models/mobilevitv2/configuration_mobilevitv2.py +++ b/src/transformers/models/mobilevitv2/configuration_mobilevitv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MobileViTV2 model configuration""" +"""MobileViTV2 model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py index 2e2d31295d..518dc949a4 100644 --- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert MobileViTV2 checkpoints from the ml-cvnets library.""" - import argparse import collections import json diff --git a/src/transformers/models/mobilevitv2/modeling_mobilevitv2.py b/src/transformers/models/mobilevitv2/modeling_mobilevitv2.py index 3d6c4c1b39..3db6582bcf 100644 --- a/src/transformers/models/mobilevitv2/modeling_mobilevitv2.py +++ b/src/transformers/models/mobilevitv2/modeling_mobilevitv2.py @@ -14,8 +14,7 @@ # limitations under the License. # # Original license: https://github.com/apple/ml-cvnets/blob/main/LICENSE -""" PyTorch MobileViTV2 model.""" - +"""PyTorch MobileViTV2 model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/mpnet/configuration_mpnet.py b/src/transformers/models/mpnet/configuration_mpnet.py index 9c53e45d98..0abb89c942 100644 --- a/src/transformers/models/mpnet/configuration_mpnet.py +++ b/src/transformers/models/mpnet/configuration_mpnet.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MPNet model configuration""" +"""MPNet model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/mpnet/modeling_mpnet.py b/src/transformers/models/mpnet/modeling_mpnet.py index bc811fd22f..11a27f5577 100644 --- a/src/transformers/models/mpnet/modeling_mpnet.py +++ b/src/transformers/models/mpnet/modeling_mpnet.py @@ -15,7 +15,6 @@ # limitations under the License. """PyTorch MPNet model.""" - import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index f0afba869b..d1864bd197 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 MPNet model.""" - +"""TF 2.0 MPNet model.""" from __future__ import annotations diff --git a/src/transformers/models/mpt/configuration_mpt.py b/src/transformers/models/mpt/configuration_mpt.py index 5d18b1419e..ed822c813b 100644 --- a/src/transformers/models/mpt/configuration_mpt.py +++ b/src/transformers/models/mpt/configuration_mpt.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Mpt configuration""" +"""Mpt configuration""" + from typing import TYPE_CHECKING, Optional, Union diff --git a/src/transformers/models/mra/configuration_mra.py b/src/transformers/models/mra/configuration_mra.py index 30c38795b5..6837de4f80 100644 --- a/src/transformers/models/mra/configuration_mra.py +++ b/src/transformers/models/mra/configuration_mra.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MRA model configuration""" +"""MRA model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/mra/modeling_mra.py b/src/transformers/models/mra/modeling_mra.py index b47a41d73d..09b2136593 100644 --- a/src/transformers/models/mra/modeling_mra.py +++ b/src/transformers/models/mra/modeling_mra.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MRA model.""" - +"""PyTorch MRA model.""" import math from pathlib import Path diff --git a/src/transformers/models/mt5/configuration_mt5.py b/src/transformers/models/mt5/configuration_mt5.py index 2d31a52563..ef629718b1 100644 --- a/src/transformers/models/mt5/configuration_mt5.py +++ b/src/transformers/models/mt5/configuration_mt5.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" mT5 model configuration""" +"""mT5 model configuration""" + from typing import Mapping from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/mt5/modeling_flax_mt5.py b/src/transformers/models/mt5/modeling_flax_mt5.py index 98406439df..fbb5b107f5 100644 --- a/src/transformers/models/mt5/modeling_flax_mt5.py +++ b/src/transformers/models/mt5/modeling_flax_mt5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax mT5 model.""" +"""Flax mT5 model.""" import jax.numpy as jnp diff --git a/src/transformers/models/mt5/modeling_mt5.py b/src/transformers/models/mt5/modeling_mt5.py index 84a9f78ca9..1336b91961 100644 --- a/src/transformers/models/mt5/modeling_mt5.py +++ b/src/transformers/models/mt5/modeling_mt5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch mT5 model.""" +"""PyTorch mT5 model.""" import copy import math diff --git a/src/transformers/models/mt5/modeling_tf_mt5.py b/src/transformers/models/mt5/modeling_tf_mt5.py index f8350eb197..7270a54948 100644 --- a/src/transformers/models/mt5/modeling_tf_mt5.py +++ b/src/transformers/models/mt5/modeling_tf_mt5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tensorflow mT5 model.""" +"""Tensorflow mT5 model.""" from ...utils import logging from ..t5.modeling_tf_t5 import TFT5EncoderModel, TFT5ForConditionalGeneration, TFT5Model diff --git a/src/transformers/models/musicgen/configuration_musicgen.py b/src/transformers/models/musicgen/configuration_musicgen.py index 58be655f38..ef2e0244c1 100644 --- a/src/transformers/models/musicgen/configuration_musicgen.py +++ b/src/transformers/models/musicgen/configuration_musicgen.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MusicGen model configuration""" +"""MusicGen model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/musicgen/convert_musicgen_transformers.py b/src/transformers/models/musicgen/convert_musicgen_transformers.py index a072ec321b..f4afd24df0 100644 --- a/src/transformers/models/musicgen/convert_musicgen_transformers.py +++ b/src/transformers/models/musicgen/convert_musicgen_transformers.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Convert MusicGen checkpoints from the original repository.""" + import argparse from pathlib import Path from typing import Dict, OrderedDict, Tuple diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index d6f0ae96f4..810f34f780 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Musicgen model.""" +"""PyTorch Musicgen model.""" + import copy import inspect import math diff --git a/src/transformers/models/musicgen/processing_musicgen.py b/src/transformers/models/musicgen/processing_musicgen.py index 847c542a60..c153c5dfe1 100644 --- a/src/transformers/models/musicgen/processing_musicgen.py +++ b/src/transformers/models/musicgen/processing_musicgen.py @@ -15,6 +15,7 @@ """ Text/audio processor class for MusicGen """ + from typing import List, Optional import numpy as np diff --git a/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py b/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py index 050bbbc2b6..b29187facb 100644 --- a/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Musicgen Melody model configuration""" +"""Musicgen Melody model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py b/src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py index 9e224d93f1..52980f73ec 100644 --- a/src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py +++ b/src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Convert Musicgen Melody checkpoints from the original repository.""" + import argparse from pathlib import Path from typing import Dict, OrderedDict, Tuple diff --git a/src/transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py b/src/transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py index 2013309da5..ac83f3ac8d 100644 --- a/src/transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py @@ -15,6 +15,7 @@ """ Feature extractor class for Musicgen Melody """ + import copy from typing import Any, Dict, List, Optional, Union diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index 6458df0a1b..119628d50d 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Musicgen Melody model.""" +"""PyTorch Musicgen Melody model.""" + import copy import inspect import math diff --git a/src/transformers/models/musicgen_melody/processing_musicgen_melody.py b/src/transformers/models/musicgen_melody/processing_musicgen_melody.py index a474be38b4..34b1d1ec4d 100644 --- a/src/transformers/models/musicgen_melody/processing_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/processing_musicgen_melody.py @@ -15,6 +15,7 @@ """ Text/audio processor class for MusicGen Melody """ + from typing import List, Optional import numpy as np diff --git a/src/transformers/models/mvp/configuration_mvp.py b/src/transformers/models/mvp/configuration_mvp.py index 00f6b14249..8e2317982b 100644 --- a/src/transformers/models/mvp/configuration_mvp.py +++ b/src/transformers/models/mvp/configuration_mvp.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" MVP model configuration""" +"""MVP model configuration""" + import warnings from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/mvp/modeling_mvp.py b/src/transformers/models/mvp/modeling_mvp.py index 7c0f478567..319f1760ce 100644 --- a/src/transformers/models/mvp/modeling_mvp.py +++ b/src/transformers/models/mvp/modeling_mvp.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch MVP model.""" +"""PyTorch MVP model.""" + import copy import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/nat/configuration_nat.py b/src/transformers/models/nat/configuration_nat.py index baf0ea13a5..b20a60ac1e 100644 --- a/src/transformers/models/nat/configuration_nat.py +++ b/src/transformers/models/nat/configuration_nat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Neighborhood Attention Transformer model configuration""" +"""Neighborhood Attention Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/nat/modeling_nat.py b/src/transformers/models/nat/modeling_nat.py index b9c332c894..fa51801009 100644 --- a/src/transformers/models/nat/modeling_nat.py +++ b/src/transformers/models/nat/modeling_nat.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Neighborhood Attention Transformer model.""" - +"""PyTorch Neighborhood Attention Transformer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/nezha/modeling_nezha.py b/src/transformers/models/nezha/modeling_nezha.py index e10ba41696..30c8b6d890 100644 --- a/src/transformers/models/nezha/modeling_nezha.py +++ b/src/transformers/models/nezha/modeling_nezha.py @@ -14,7 +14,6 @@ # limitations under the License. """PyTorch Nezha model.""" - import math import os import warnings diff --git a/src/transformers/models/nllb_moe/configuration_nllb_moe.py b/src/transformers/models/nllb_moe/configuration_nllb_moe.py index 98c8397c18..ef12c199ef 100644 --- a/src/transformers/models/nllb_moe/configuration_nllb_moe.py +++ b/src/transformers/models/nllb_moe/configuration_nllb_moe.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" NLLB-MoE model configuration""" +"""NLLB-MoE model configuration""" + from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/nllb_moe/modeling_nllb_moe.py b/src/transformers/models/nllb_moe/modeling_nllb_moe.py index 1f39dfd39b..2bec0fb84d 100644 --- a/src/transformers/models/nllb_moe/modeling_nllb_moe.py +++ b/src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch NLLB-MoE model.""" - +"""PyTorch NLLB-MoE model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/nougat/convert_nougat_to_hf.py b/src/transformers/models/nougat/convert_nougat_to_hf.py index ecc74fdb5f..e42f8553ac 100644 --- a/src/transformers/models/nougat/convert_nougat_to_hf.py +++ b/src/transformers/models/nougat/convert_nougat_to_hf.py @@ -113,22 +113,22 @@ def convert_state_dict(orig_state_dict, model): orig_state_dict[ f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight" ] = val[:dim, :] - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight" - ] = val[dim : dim * 2, :] + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"] = ( + val[dim : dim * 2, :] + ) orig_state_dict[ f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight" ] = val[-dim:, :] else: - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias" - ] = val[:dim] - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias" - ] = val[dim : dim * 2] - orig_state_dict[ - f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias" - ] = val[-dim:] + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"] = ( + val[:dim] + ) + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias"] = ( + val[dim : dim * 2] + ) + orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias"] = ( + val[-dim:] + ) elif "attn_mask" in key or key in ["encoder.model.norm.weight", "encoder.model.norm.bias"]: # HuggingFace implementation doesn't use attn_mask buffer # and model doesn't use final LayerNorms for the encoder diff --git a/src/transformers/models/nougat/tokenization_nougat_fast.py b/src/transformers/models/nougat/tokenization_nougat_fast.py index ef6b613bba..0a7eec4ad9 100644 --- a/src/transformers/models/nougat/tokenization_nougat_fast.py +++ b/src/transformers/models/nougat/tokenization_nougat_fast.py @@ -15,6 +15,7 @@ """ Fast tokenizer class for Nougat. """ + import re from functools import partial from multiprocessing import Pool diff --git a/src/transformers/models/nystromformer/configuration_nystromformer.py b/src/transformers/models/nystromformer/configuration_nystromformer.py index ca277e266d..e52b02d9f8 100644 --- a/src/transformers/models/nystromformer/configuration_nystromformer.py +++ b/src/transformers/models/nystromformer/configuration_nystromformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Nystromformer model configuration""" +"""Nystromformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/nystromformer/modeling_nystromformer.py b/src/transformers/models/nystromformer/modeling_nystromformer.py index cb027c314e..4bb4c33fff 100755 --- a/src/transformers/models/nystromformer/modeling_nystromformer.py +++ b/src/transformers/models/nystromformer/modeling_nystromformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Nystromformer model.""" - +"""PyTorch Nystromformer model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/olmo/configuration_olmo.py b/src/transformers/models/olmo/configuration_olmo.py index 56cd01f7f2..440dc2ee9d 100644 --- a/src/transformers/models/olmo/configuration_olmo.py +++ b/src/transformers/models/olmo/configuration_olmo.py @@ -17,7 +17,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OLMo model configuration""" +"""OLMo model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index f3c01191d9..57bff716f4 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """OneFormer model configuration""" + from typing import Dict, Optional from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py index fff665b2ff..9c2f662207 100644 --- a/src/transformers/models/oneformer/modeling_oneformer.py +++ b/src/transformers/models/oneformer/modeling_oneformer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch OneFormer model.""" +"""PyTorch OneFormer model.""" + import copy import math import warnings diff --git a/src/transformers/models/openai/configuration_openai.py b/src/transformers/models/openai/configuration_openai.py index 64411455ee..dde668b32f 100644 --- a/src/transformers/models/openai/configuration_openai.py +++ b/src/transformers/models/openai/configuration_openai.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OpenAI GPT configuration""" +"""OpenAI GPT configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py index 1b101aea0c..3d5218c204 100755 --- a/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert OpenAI GPT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index 1c754daa0e..07b6f9a637 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -15,7 +15,6 @@ # limitations under the License. """PyTorch OpenAI GPT model.""" - import json import math import os diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index e2d0ae885c..20f5581c95 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 OpenAI GPT model.""" +"""TF 2.0 OpenAI GPT model.""" from __future__ import annotations diff --git a/src/transformers/models/openai/tokenization_openai.py b/src/transformers/models/openai/tokenization_openai.py index 4f2b279160..d7427aa429 100644 --- a/src/transformers/models/openai/tokenization_openai.py +++ b/src/transformers/models/openai/tokenization_openai.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for OpenAI GPT.""" - import json import os import re diff --git a/src/transformers/models/openai/tokenization_openai_fast.py b/src/transformers/models/openai/tokenization_openai_fast.py index 214db53850..41f4c8db90 100644 --- a/src/transformers/models/openai/tokenization_openai_fast.py +++ b/src/transformers/models/openai/tokenization_openai_fast.py @@ -14,7 +14,6 @@ # limitations under the License. """Fast Tokenization classes for OpenAI GPT.""" - from typing import Optional, Tuple from ...tokenization_utils_fast import PreTrainedTokenizerFast diff --git a/src/transformers/models/opt/configuration_opt.py b/src/transformers/models/opt/configuration_opt.py index a9802d2ef3..455a6362a7 100644 --- a/src/transformers/models/opt/configuration_opt.py +++ b/src/transformers/models/opt/configuration_opt.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OPT model configuration""" +"""OPT model configuration""" + from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py index 3f302b2ec3..486b477f97 100644 --- a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert OPT checkpoint.""" - import argparse from pathlib import Path diff --git a/src/transformers/models/opt/modeling_flax_opt.py b/src/transformers/models/opt/modeling_flax_opt.py index 5d9839f120..c6296e4eea 100644 --- a/src/transformers/models/opt/modeling_flax_opt.py +++ b/src/transformers/models/opt/modeling_flax_opt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax OPT model.""" +"""Flax OPT model.""" from functools import partial from typing import Optional, Tuple diff --git a/src/transformers/models/opt/modeling_opt.py b/src/transformers/models/opt/modeling_opt.py index f93c3866ae..87695314b0 100644 --- a/src/transformers/models/opt/modeling_opt.py +++ b/src/transformers/models/opt/modeling_opt.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch OPT model.""" +"""PyTorch OPT model.""" + from typing import List, Optional, Tuple, Union import torch diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 8dbad97e08..9c5dfa4ade 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 OPT model.""" - +"""TF 2.0 OPT model.""" from __future__ import annotations diff --git a/src/transformers/models/owlv2/configuration_owlv2.py b/src/transformers/models/owlv2/configuration_owlv2.py index 4b09166b70..72d52a533d 100644 --- a/src/transformers/models/owlv2/configuration_owlv2.py +++ b/src/transformers/models/owlv2/configuration_owlv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OWLv2 model configuration""" +"""OWLv2 model configuration""" import os from typing import TYPE_CHECKING, Dict, Union diff --git a/src/transformers/models/owlv2/modeling_owlv2.py b/src/transformers/models/owlv2/modeling_owlv2.py index c98846ea69..a7924085fc 100644 --- a/src/transformers/models/owlv2/modeling_owlv2.py +++ b/src/transformers/models/owlv2/modeling_owlv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch OWLv2 model.""" +"""PyTorch OWLv2 model.""" from dataclasses import dataclass from functools import lru_cache diff --git a/src/transformers/models/owlvit/configuration_owlvit.py b/src/transformers/models/owlvit/configuration_owlvit.py index 747f1c3ccb..2cbb061222 100644 --- a/src/transformers/models/owlvit/configuration_owlvit.py +++ b/src/transformers/models/owlvit/configuration_owlvit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" OWL-ViT model configuration""" +"""OWL-ViT model configuration""" import os from collections import OrderedDict diff --git a/src/transformers/models/owlvit/modeling_owlvit.py b/src/transformers/models/owlvit/modeling_owlvit.py index c0676f519e..a7d8445523 100644 --- a/src/transformers/models/owlvit/modeling_owlvit.py +++ b/src/transformers/models/owlvit/modeling_owlvit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch OWL-ViT model.""" +"""PyTorch OWL-ViT model.""" from dataclasses import dataclass from functools import lru_cache diff --git a/src/transformers/models/paligemma/configuration_paligemma.py b/src/transformers/models/paligemma/configuration_paligemma.py index 7252425b85..d092142476 100644 --- a/src/transformers/models/paligemma/configuration_paligemma.py +++ b/src/transformers/models/paligemma/configuration_paligemma.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PaliGemmamodel configuration""" +"""PaliGemmamodel configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py b/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py index 0d43cac016..bcea5372e5 100644 --- a/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py +++ b/src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py @@ -12,9 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Convert PaliGemma checkpoints from the original repository. -""" - +"""Convert PaliGemma checkpoints from the original repository.""" import argparse import collections diff --git a/src/transformers/models/paligemma/modeling_paligemma.py b/src/transformers/models/paligemma/modeling_paligemma.py index b2f2904b2f..bc900f0de7 100644 --- a/src/transformers/models/paligemma/modeling_paligemma.py +++ b/src/transformers/models/paligemma/modeling_paligemma.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PaliGemmamodel.""" +"""PyTorch PaliGemmamodel.""" + from dataclasses import dataclass from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/paligemma/processing_paligemma.py b/src/transformers/models/paligemma/processing_paligemma.py index 258954d856..f086795f43 100644 --- a/src/transformers/models/paligemma/processing_paligemma.py +++ b/src/transformers/models/paligemma/processing_paligemma.py @@ -16,7 +16,6 @@ Processor class for PaliGemma. """ - import logging from typing import List, Optional, Union diff --git a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py index c3766c3325..10089a3fef 100644 --- a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PatchTSMixer model configuration""" +"""PatchTSMixer model configuration""" from typing import List, Optional, Union diff --git a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py index a824faa040..e4c8385697 100644 --- a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PatchTSMixer model.""" +"""PyTorch PatchTSMixer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/patchtst/modeling_patchtst.py b/src/transformers/models/patchtst/modeling_patchtst.py index 884cd44c83..e30e457283 100755 --- a/src/transformers/models/patchtst/modeling_patchtst.py +++ b/src/transformers/models/patchtst/modeling_patchtst.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PatchTST model.""" +"""PyTorch PatchTST model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/pegasus/configuration_pegasus.py b/src/transformers/models/pegasus/configuration_pegasus.py index 7dff1a7f85..2cc49857f3 100644 --- a/src/transformers/models/pegasus/configuration_pegasus.py +++ b/src/transformers/models/pegasus/configuration_pegasus.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PEGASUS model configuration""" +"""PEGASUS model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/pegasus/modeling_flax_pegasus.py b/src/transformers/models/pegasus/modeling_flax_pegasus.py index f822af1f22..e50fc1710c 100644 --- a/src/transformers/models/pegasus/modeling_flax_pegasus.py +++ b/src/transformers/models/pegasus/modeling_flax_pegasus.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax PEGASUS model.""" - +"""Flax PEGASUS model.""" import math import random diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index f151ae9940..42cef3a635 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PEGASUS model.""" +"""PyTorch PEGASUS model.""" import copy import math diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index a3acdc027f..45e9fdbbed 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Pegasus model.""" - +"""TF 2.0 Pegasus model.""" from __future__ import annotations diff --git a/src/transformers/models/pegasus/tokenization_pegasus_fast.py b/src/transformers/models/pegasus/tokenization_pegasus_fast.py index f1252e959e..11ccb1ff4a 100644 --- a/src/transformers/models/pegasus/tokenization_pegasus_fast.py +++ b/src/transformers/models/pegasus/tokenization_pegasus_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model PEGASUS.""" - +"""Tokenization class for model PEGASUS.""" import os from shutil import copyfile diff --git a/src/transformers/models/pegasus_x/configuration_pegasus_x.py b/src/transformers/models/pegasus_x/configuration_pegasus_x.py index 166f3b18ab..b84c79656e 100644 --- a/src/transformers/models/pegasus_x/configuration_pegasus_x.py +++ b/src/transformers/models/pegasus_x/configuration_pegasus_x.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PEGASUS-X model configuration""" +"""PEGASUS-X model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/pegasus_x/modeling_pegasus_x.py b/src/transformers/models/pegasus_x/modeling_pegasus_x.py index ba99256a16..6d9072777b 100755 --- a/src/transformers/models/pegasus_x/modeling_pegasus_x.py +++ b/src/transformers/models/pegasus_x/modeling_pegasus_x.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PEGASUS-X model.""" +"""PyTorch PEGASUS-X model.""" import dataclasses import math diff --git a/src/transformers/models/perceiver/configuration_perceiver.py b/src/transformers/models/perceiver/configuration_perceiver.py index b4b996aef0..e2c9cca4c3 100644 --- a/src/transformers/models/perceiver/configuration_perceiver.py +++ b/src/transformers/models/perceiver/configuration_perceiver.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Perceiver model configuration""" +"""Perceiver model configuration""" from collections import OrderedDict from typing import Any, Mapping, Optional, Union diff --git a/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py b/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py index 1ea9798127..190b4f51f6 100644 --- a/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py +++ b/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Perceiver checkpoints originally implemented in Haiku.""" - import argparse import json import pickle diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index f768df991b..4921e292d6 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Perceiver model.""" +"""PyTorch Perceiver model.""" import abc import math diff --git a/src/transformers/models/perceiver/tokenization_perceiver.py b/src/transformers/models/perceiver/tokenization_perceiver.py index b4ec1e378e..90686b78dc 100644 --- a/src/transformers/models/perceiver/tokenization_perceiver.py +++ b/src/transformers/models/perceiver/tokenization_perceiver.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for Perceiver.""" - +"""Tokenization class for Perceiver.""" from typing import Dict, List, Optional, Tuple diff --git a/src/transformers/models/persimmon/configuration_persimmon.py b/src/transformers/models/persimmon/configuration_persimmon.py index 04bf792964..b8e02256de 100644 --- a/src/transformers/models/persimmon/configuration_persimmon.py +++ b/src/transformers/models/persimmon/configuration_persimmon.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Persimmon model configuration""" +"""Persimmon model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index ea2bd074ee..803169ddd5 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Persimmon model.""" +"""PyTorch Persimmon model.""" + import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/phi/configuration_phi.py b/src/transformers/models/phi/configuration_phi.py index d221255f11..c67f07dcf5 100644 --- a/src/transformers/models/phi/configuration_phi.py +++ b/src/transformers/models/phi/configuration_phi.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Phi model configuration""" - +"""Phi model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 1436138f91..d8c1f4a9b4 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Phi model.""" - +"""PyTorch Phi model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/phi3/configuration_phi3.py b/src/transformers/models/phi3/configuration_phi3.py index dfa576ad61..0e80566f54 100644 --- a/src/transformers/models/phi3/configuration_phi3.py +++ b/src/transformers/models/phi3/configuration_phi3.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Phi-3 model configuration""" - +"""Phi-3 model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 224aad0085..9ce7e44dce 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Phi-3 model.""" +"""PyTorch Phi-3 model.""" import inspect import math diff --git a/src/transformers/models/phobert/tokenization_phobert.py b/src/transformers/models/phobert/tokenization_phobert.py index f312f49501..85450f4d8e 100644 --- a/src/transformers/models/phobert/tokenization_phobert.py +++ b/src/transformers/models/phobert/tokenization_phobert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for PhoBERT""" - +"""Tokenization classes for PhoBERT""" import os import re diff --git a/src/transformers/models/pix2struct/configuration_pix2struct.py b/src/transformers/models/pix2struct/configuration_pix2struct.py index 2ad2509e44..24cf598cc4 100644 --- a/src/transformers/models/pix2struct/configuration_pix2struct.py +++ b/src/transformers/models/pix2struct/configuration_pix2struct.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Pix2Struct model configuration""" +"""Pix2Struct model configuration""" import os from typing import Union diff --git a/src/transformers/models/pix2struct/image_processing_pix2struct.py b/src/transformers/models/pix2struct/image_processing_pix2struct.py index 945708d69a..466997c8d8 100644 --- a/src/transformers/models/pix2struct/image_processing_pix2struct.py +++ b/src/transformers/models/pix2struct/image_processing_pix2struct.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Image processor class for Pix2Struct.""" + import io import math from typing import Dict, Optional, Union diff --git a/src/transformers/models/pix2struct/modeling_pix2struct.py b/src/transformers/models/pix2struct/modeling_pix2struct.py index 86ccb1dd74..94d882c805 100644 --- a/src/transformers/models/pix2struct/modeling_pix2struct.py +++ b/src/transformers/models/pix2struct/modeling_pix2struct.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Pix2Struct modeling file""" +"""Pix2Struct modeling file""" import math from typing import Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/plbart/configuration_plbart.py b/src/transformers/models/plbart/configuration_plbart.py index b899847b04..86dbc0cec8 100644 --- a/src/transformers/models/plbart/configuration_plbart.py +++ b/src/transformers/models/plbart/configuration_plbart.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PLBART model configuration""" +"""PLBART model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index e2b59f980c..93d91e1600 100644 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PLBART model.""" +"""PyTorch PLBART model.""" + import copy import math from typing import Any, Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/poolformer/configuration_poolformer.py b/src/transformers/models/poolformer/configuration_poolformer.py index 1f297077fe..a7467b380e 100644 --- a/src/transformers/models/poolformer/configuration_poolformer.py +++ b/src/transformers/models/poolformer/configuration_poolformer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PoolFormer model configuration""" +"""PoolFormer model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/poolformer/modeling_poolformer.py b/src/transformers/models/poolformer/modeling_poolformer.py index ae4e3ea0a7..e70974507b 100755 --- a/src/transformers/models/poolformer/modeling_poolformer.py +++ b/src/transformers/models/poolformer/modeling_poolformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PoolFormer model.""" - +"""PyTorch PoolFormer model.""" import collections.abc from typing import Optional, Tuple, Union diff --git a/src/transformers/models/pop2piano/configuration_pop2piano.py b/src/transformers/models/pop2piano/configuration_pop2piano.py index 8bb46b008d..51043dab0c 100644 --- a/src/transformers/models/pop2piano/configuration_pop2piano.py +++ b/src/transformers/models/pop2piano/configuration_pop2piano.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Pop2Piano model configuration""" - +"""Pop2Piano model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py index a73c57886d..54b8bb67e6 100644 --- a/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py +++ b/src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" File for loading the Pop2Piano model weights from the official repository and to show how tokenizer vocab was - constructed""" +"""File for loading the Pop2Piano model weights from the official repository and to show how tokenizer vocab was +constructed""" import json diff --git a/src/transformers/models/pop2piano/feature_extraction_pop2piano.py b/src/transformers/models/pop2piano/feature_extraction_pop2piano.py index 9bf5326c0b..738b932355 100644 --- a/src/transformers/models/pop2piano/feature_extraction_pop2piano.py +++ b/src/transformers/models/pop2piano/feature_extraction_pop2piano.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Feature extractor class for Pop2Piano""" +"""Feature extractor class for Pop2Piano""" import warnings from typing import List, Optional, Union diff --git a/src/transformers/models/pop2piano/modeling_pop2piano.py b/src/transformers/models/pop2piano/modeling_pop2piano.py index e46f2907f4..7b8795e453 100644 --- a/src/transformers/models/pop2piano/modeling_pop2piano.py +++ b/src/transformers/models/pop2piano/modeling_pop2piano.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Pop2Piano model.""" - +"""PyTorch Pop2Piano model.""" import copy import math diff --git a/src/transformers/models/pop2piano/processing_pop2piano.py b/src/transformers/models/pop2piano/processing_pop2piano.py index 639d2e7aea..280e5dc796 100644 --- a/src/transformers/models/pop2piano/processing_pop2piano.py +++ b/src/transformers/models/pop2piano/processing_pop2piano.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Processor class for Pop2Piano.""" +"""Processor class for Pop2Piano.""" import os from typing import List, Optional, Union diff --git a/src/transformers/models/prophetnet/configuration_prophetnet.py b/src/transformers/models/prophetnet/configuration_prophetnet.py index 1b40c9a2c0..7a9da32b3c 100644 --- a/src/transformers/models/prophetnet/configuration_prophetnet.py +++ b/src/transformers/models/prophetnet/configuration_prophetnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ProphetNet model configuration""" +"""ProphetNet model configuration""" from typing import Callable, Optional, Union diff --git a/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py index c9e64c06ef..3039056116 100644 --- a/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ProphetNet checkpoint.""" - import argparse from torch import nn diff --git a/src/transformers/models/prophetnet/modeling_prophetnet.py b/src/transformers/models/prophetnet/modeling_prophetnet.py index b7eca9c2b3..96fa2e2c12 100644 --- a/src/transformers/models/prophetnet/modeling_prophetnet.py +++ b/src/transformers/models/prophetnet/modeling_prophetnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ProphetNet model, ported from ProphetNet repo(fairsequery_states version).""" +"""PyTorch ProphetNet model, ported from ProphetNet repo(fairsequery_states version).""" import copy import math diff --git a/src/transformers/models/pvt/configuration_pvt.py b/src/transformers/models/pvt/configuration_pvt.py index 82b4822435..25348818f0 100644 --- a/src/transformers/models/pvt/configuration_pvt.py +++ b/src/transformers/models/pvt/configuration_pvt.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Pvt model configuration""" +"""Pvt model configuration""" from collections import OrderedDict from typing import Callable, List, Mapping diff --git a/src/transformers/models/pvt/convert_pvt_to_pytorch.py b/src/transformers/models/pvt/convert_pvt_to_pytorch.py index 187f3200d6..73ae4c1571 100644 --- a/src/transformers/models/pvt/convert_pvt_to_pytorch.py +++ b/src/transformers/models/pvt/convert_pvt_to_pytorch.py @@ -16,7 +16,6 @@ # limitations under the License. """Convert Pvt checkpoints from the original library.""" - import argparse from pathlib import Path diff --git a/src/transformers/models/pvt/modeling_pvt.py b/src/transformers/models/pvt/modeling_pvt.py index f849b42e9c..306cc13122 100755 --- a/src/transformers/models/pvt/modeling_pvt.py +++ b/src/transformers/models/pvt/modeling_pvt.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch PVT model.""" +"""PyTorch PVT model.""" import collections import math diff --git a/src/transformers/models/qdqbert/configuration_qdqbert.py b/src/transformers/models/qdqbert/configuration_qdqbert.py index 40ae3cc310..9f1fdfe31d 100644 --- a/src/transformers/models/qdqbert/configuration_qdqbert.py +++ b/src/transformers/models/qdqbert/configuration_qdqbert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" QDQBERT model configuration""" +"""QDQBERT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/qdqbert/modeling_qdqbert.py b/src/transformers/models/qdqbert/modeling_qdqbert.py index a10979b916..6078061873 100755 --- a/src/transformers/models/qdqbert/modeling_qdqbert.py +++ b/src/transformers/models/qdqbert/modeling_qdqbert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch QDQBERT model.""" - +"""PyTorch QDQBERT model.""" import math import os diff --git a/src/transformers/models/qwen2/configuration_qwen2.py b/src/transformers/models/qwen2/configuration_qwen2.py index c2a99dfa8b..1c60d0dba9 100644 --- a/src/transformers/models/qwen2/configuration_qwen2.py +++ b/src/transformers/models/qwen2/configuration_qwen2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Qwen2 model configuration""" +"""Qwen2 model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 98a9e27c9b..c037e28232 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Qwen2 model.""" +"""PyTorch Qwen2 model.""" + import inspect import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/qwen2_moe/configuration_qwen2_moe.py b/src/transformers/models/qwen2_moe/configuration_qwen2_moe.py index 942c0712be..a37f5218c6 100644 --- a/src/transformers/models/qwen2_moe/configuration_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/configuration_qwen2_moe.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Qwen2MoE model configuration""" +"""Qwen2MoE model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/rag/configuration_rag.py b/src/transformers/models/rag/configuration_rag.py index 2229e485db..5dd4d12c5e 100644 --- a/src/transformers/models/rag/configuration_rag.py +++ b/src/transformers/models/rag/configuration_rag.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RAG model configuration""" - +"""RAG model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import add_start_docstrings diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 9d8ed65049..d7fb649908 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -15,7 +15,6 @@ """TFRAG model implementation.""" - from __future__ import annotations import copy diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py index 5b6ec67e6b..5bc87a895d 100644 --- a/src/transformers/models/rag/tokenization_rag.py +++ b/src/transformers/models/rag/tokenization_rag.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for RAG.""" + import os import warnings from typing import List, Optional diff --git a/src/transformers/models/realm/configuration_realm.py b/src/transformers/models/realm/configuration_realm.py index fd21f44a55..7e84f2916d 100644 --- a/src/transformers/models/realm/configuration_realm.py +++ b/src/transformers/models/realm/configuration_realm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" REALM model configuration.""" +"""REALM model configuration.""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/realm/modeling_realm.py b/src/transformers/models/realm/modeling_realm.py index ff5e43bc13..7c5c344ae5 100644 --- a/src/transformers/models/realm/modeling_realm.py +++ b/src/transformers/models/realm/modeling_realm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch REALM model.""" +"""PyTorch REALM model.""" import math import os diff --git a/src/transformers/models/recurrent_gemma/configuration_recurrent_gemma.py b/src/transformers/models/recurrent_gemma/configuration_recurrent_gemma.py index f5a3f9673a..7f45a41710 100644 --- a/src/transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +++ b/src/transformers/models/recurrent_gemma/configuration_recurrent_gemma.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RecurrentGemma model configuration""" +"""RecurrentGemma model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py b/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py index 06aef9a03d..c48132c83c 100644 --- a/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +++ b/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch RecurrentGemma model.""" +"""PyTorch RecurrentGemma model.""" import math from typing import Dict, Optional, Tuple, Union diff --git a/src/transformers/models/reformer/configuration_reformer.py b/src/transformers/models/reformer/configuration_reformer.py index eecd67cc06..018831010b 100755 --- a/src/transformers/models/reformer/configuration_reformer.py +++ b/src/transformers/models/reformer/configuration_reformer.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Reformer model configuration""" +"""Reformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py index f25e166ef9..ad6a077581 100755 --- a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py +++ b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Reformer checkpoint.""" - import argparse import pickle diff --git a/src/transformers/models/reformer/tokenization_reformer.py b/src/transformers/models/reformer/tokenization_reformer.py index efc692185b..eb45749336 100644 --- a/src/transformers/models/reformer/tokenization_reformer.py +++ b/src/transformers/models/reformer/tokenization_reformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model Reformer.""" - +"""Tokenization class for model Reformer.""" import os from shutil import copyfile diff --git a/src/transformers/models/reformer/tokenization_reformer_fast.py b/src/transformers/models/reformer/tokenization_reformer_fast.py index fb0f2c8b8e..26f007a7f7 100644 --- a/src/transformers/models/reformer/tokenization_reformer_fast.py +++ b/src/transformers/models/reformer/tokenization_reformer_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model Reformer.""" - +"""Tokenization class for model Reformer.""" import os from shutil import copyfile diff --git a/src/transformers/models/regnet/configuration_regnet.py b/src/transformers/models/regnet/configuration_regnet.py index e24bc70a89..34f90ce184 100644 --- a/src/transformers/models/regnet/configuration_regnet.py +++ b/src/transformers/models/regnet/configuration_regnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RegNet model configuration""" +"""RegNet model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/regnet/convert_regnet_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_to_pytorch.py index d29077c1a7..1f89b7bf8c 100644 --- a/src/transformers/models/regnet/convert_regnet_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert RegNet checkpoints from timm and vissl.""" - import argparse import json from dataclasses import dataclass, field diff --git a/src/transformers/models/regnet/modeling_regnet.py b/src/transformers/models/regnet/modeling_regnet.py index df8f64a03b..2a348c792a 100644 --- a/src/transformers/models/regnet/modeling_regnet.py +++ b/src/transformers/models/regnet/modeling_regnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch RegNet model.""" +"""PyTorch RegNet model.""" from typing import Optional diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py index 24ebb3f5ca..3d6b38b9e4 100644 --- a/src/transformers/models/regnet/modeling_tf_regnet.py +++ b/src/transformers/models/regnet/modeling_tf_regnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow RegNet model.""" +"""TensorFlow RegNet model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/rembert/configuration_rembert.py b/src/transformers/models/rembert/configuration_rembert.py index 471f2f7521..f9d28303fd 100644 --- a/src/transformers/models/rembert/configuration_rembert.py +++ b/src/transformers/models/rembert/configuration_rembert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RemBERT model configuration""" +"""RemBERT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py b/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py index 4c3d53e789..622d507080 100755 --- a/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert RemBERT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index e92418fcff..31f7e3dce4 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch RemBERT model.""" - +"""PyTorch RemBERT model.""" import math import os diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index daceef1080..5ee9ba1364 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 RemBERT model.""" - +"""TF 2.0 RemBERT model.""" from __future__ import annotations diff --git a/src/transformers/models/rembert/tokenization_rembert.py b/src/transformers/models/rembert/tokenization_rembert.py index a2b1f9abc2..0c046b9bca 100644 --- a/src/transformers/models/rembert/tokenization_rembert.py +++ b/src/transformers/models/rembert/tokenization_rembert.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for RemBERT.""" - import os from shutil import copyfile from typing import List, Optional, Tuple diff --git a/src/transformers/models/rembert/tokenization_rembert_fast.py b/src/transformers/models/rembert/tokenization_rembert_fast.py index b7165e362a..350e02e33b 100644 --- a/src/transformers/models/rembert/tokenization_rembert_fast.py +++ b/src/transformers/models/rembert/tokenization_rembert_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for RemBERT model.""" - +"""Tokenization classes for RemBERT model.""" import os from shutil import copyfile diff --git a/src/transformers/models/resnet/configuration_resnet.py b/src/transformers/models/resnet/configuration_resnet.py index 46ccd96cd9..92fe656287 100644 --- a/src/transformers/models/resnet/configuration_resnet.py +++ b/src/transformers/models/resnet/configuration_resnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ResNet model configuration""" +"""ResNet model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/resnet/convert_resnet_to_pytorch.py b/src/transformers/models/resnet/convert_resnet_to_pytorch.py index 52b0bd9068..feceb74d16 100644 --- a/src/transformers/models/resnet/convert_resnet_to_pytorch.py +++ b/src/transformers/models/resnet/convert_resnet_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ResNet checkpoints from timm.""" - import argparse import json from dataclasses import dataclass, field diff --git a/src/transformers/models/resnet/modeling_resnet.py b/src/transformers/models/resnet/modeling_resnet.py index 06af704603..c7cf0e03c7 100644 --- a/src/transformers/models/resnet/modeling_resnet.py +++ b/src/transformers/models/resnet/modeling_resnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ResNet model.""" +"""PyTorch ResNet model.""" from typing import Optional diff --git a/src/transformers/models/resnet/modeling_tf_resnet.py b/src/transformers/models/resnet/modeling_tf_resnet.py index 4d68775c92..1e2ec143cd 100644 --- a/src/transformers/models/resnet/modeling_tf_resnet.py +++ b/src/transformers/models/resnet/modeling_tf_resnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow ResNet model.""" +"""TensorFlow ResNet model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/roberta/configuration_roberta.py b/src/transformers/models/roberta/configuration_roberta.py index 0ecd57b23a..d08f3df477 100644 --- a/src/transformers/models/roberta/configuration_roberta.py +++ b/src/transformers/models/roberta/configuration_roberta.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RoBERTa configuration""" +"""RoBERTa configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/roberta/convert_roberta_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/roberta/convert_roberta_original_pytorch_checkpoint_to_pytorch.py index e4d95354ff..c0e6bf94d2 100644 --- a/src/transformers/models/roberta/convert_roberta_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/roberta/convert_roberta_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert RoBERTa checkpoint.""" - import argparse import pathlib diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 84448a168d..439d12a870 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 RoBERTa model.""" - +"""TF 2.0 RoBERTa model.""" from __future__ import annotations diff --git a/src/transformers/models/roberta/tokenization_roberta_fast.py b/src/transformers/models/roberta/tokenization_roberta_fast.py index 702af8a33e..8384397033 100644 --- a/src/transformers/models/roberta/tokenization_roberta_fast.py +++ b/src/transformers/models/roberta/tokenization_roberta_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Fast Tokenization classes for RoBERTa.""" + import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py index e7e74b0cdf..e0c939f657 100644 --- a/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RoBERTa-PreLayerNorm configuration""" +"""RoBERTa-PreLayerNorm configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py index 41fd14c5fd..b8491db08b 100644 --- a/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert RoBERTa-PreLayerNorm checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py index c13778c1ac..c50227eaa2 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax RoBERTa-PreLayerNorm model.""" +"""Flax RoBERTa-PreLayerNorm model.""" + from typing import Callable, Optional, Tuple import flax.linen as nn diff --git a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py index beb9c383e1..1ecd376901 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 RoBERTa-PreLayerNorm model.""" - +"""TF 2.0 RoBERTa-PreLayerNorm model.""" from __future__ import annotations diff --git a/src/transformers/models/roc_bert/configuration_roc_bert.py b/src/transformers/models/roc_bert/configuration_roc_bert.py index 752c791cf9..d402349e67 100644 --- a/src/transformers/models/roc_bert/configuration_roc_bert.py +++ b/src/transformers/models/roc_bert/configuration_roc_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RoCBert model configuration""" +"""RoCBert model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/roc_bert/modeling_roc_bert.py b/src/transformers/models/roc_bert/modeling_roc_bert.py index 216aaf78b2..4c63d364ad 100644 --- a/src/transformers/models/roc_bert/modeling_roc_bert.py +++ b/src/transformers/models/roc_bert/modeling_roc_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch RoCBert model.""" +"""PyTorch RoCBert model.""" import math import os diff --git a/src/transformers/models/roformer/configuration_roformer.py b/src/transformers/models/roformer/configuration_roformer.py index 0732c3a9e0..ae4ed0fd7b 100644 --- a/src/transformers/models/roformer/configuration_roformer.py +++ b/src/transformers/models/roformer/configuration_roformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RoFormer model configuration""" +"""RoFormer model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py index 0ab8b671d0..d227948e0e 100755 --- a/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert RoFormer checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/roformer/modeling_flax_roformer.py b/src/transformers/models/roformer/modeling_flax_roformer.py index 6e154b311d..f53a056c13 100644 --- a/src/transformers/models/roformer/modeling_flax_roformer.py +++ b/src/transformers/models/roformer/modeling_flax_roformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax RoFormer model.""" +"""Flax RoFormer model.""" from typing import Callable, Optional, Tuple diff --git a/src/transformers/models/roformer/modeling_roformer.py b/src/transformers/models/roformer/modeling_roformer.py index 2911d98d31..69588ff743 100644 --- a/src/transformers/models/roformer/modeling_roformer.py +++ b/src/transformers/models/roformer/modeling_roformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch RoFormer model.""" - +"""PyTorch RoFormer model.""" import math import os diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index e3f84cc78a..20af183691 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 RoFormer model.""" - +"""TF 2.0 RoFormer model.""" from __future__ import annotations diff --git a/src/transformers/models/roformer/tokenization_roformer_fast.py b/src/transformers/models/roformer/tokenization_roformer_fast.py index 1f073c03a5..cc161c1a26 100644 --- a/src/transformers/models/roformer/tokenization_roformer_fast.py +++ b/src/transformers/models/roformer/tokenization_roformer_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for RoFormer.""" + import json from typing import List, Optional, Tuple diff --git a/src/transformers/models/rwkv/configuration_rwkv.py b/src/transformers/models/rwkv/configuration_rwkv.py index 57b7412333..9539b857ea 100644 --- a/src/transformers/models/rwkv/configuration_rwkv.py +++ b/src/transformers/models/rwkv/configuration_rwkv.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" RWKV configuration""" +"""RWKV configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py index b340b9f028..44cf17b1cf 100644 --- a/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py +++ b/src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert a RWKV checkpoint from BlinkDL to the Hugging Face format.""" - import argparse import gc import json diff --git a/src/transformers/models/sam/configuration_sam.py b/src/transformers/models/sam/configuration_sam.py index 63dc5ff630..b0045655d2 100644 --- a/src/transformers/models/sam/configuration_sam.py +++ b/src/transformers/models/sam/configuration_sam.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SAM model configuration""" - +"""SAM model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/sam/convert_sam_to_hf.py b/src/transformers/models/sam/convert_sam_to_hf.py index be375494f0..dd8818b68c 100644 --- a/src/transformers/models/sam/convert_sam_to_hf.py +++ b/src/transformers/models/sam/convert_sam_to_hf.py @@ -19,6 +19,7 @@ URL: https://github.com/facebookresearch/segment-anything. Also supports converting the SlimSAM checkpoints from https://github.com/czg1225/SlimSAM/tree/master. """ + import argparse import re diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index ccdc72fc7b..99315858a3 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Image processor class for SAM.""" + import math from copy import deepcopy from itertools import product diff --git a/src/transformers/models/sam/modeling_sam.py b/src/transformers/models/sam/modeling_sam.py index 0234572611..f5baf5bcf3 100644 --- a/src/transformers/models/sam/modeling_sam.py +++ b/src/transformers/models/sam/modeling_sam.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SAM model.""" +"""PyTorch SAM model.""" import collections import math diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py index 5da1293e0f..1e5099f191 100644 --- a/src/transformers/models/sam/modeling_tf_sam.py +++ b/src/transformers/models/sam/modeling_tf_sam.py @@ -17,7 +17,6 @@ TensorFlow SAM model. This file was mostly generated by auto-translation from th discrepancy, the original file should be regarded as the 'reference' version. """ - from __future__ import annotations import collections diff --git a/src/transformers/models/sam/processing_sam.py b/src/transformers/models/sam/processing_sam.py index ed89ebeb3a..9e67be1e1e 100644 --- a/src/transformers/models/sam/processing_sam.py +++ b/src/transformers/models/sam/processing_sam.py @@ -15,6 +15,7 @@ """ Processor class for SAM. """ + from copy import deepcopy from typing import Optional, Union diff --git a/src/transformers/models/seamless_m4t/configuration_seamless_m4t.py b/src/transformers/models/seamless_m4t/configuration_seamless_m4t.py index 2075c65fa8..c24eb0ecb6 100644 --- a/src/transformers/models/seamless_m4t/configuration_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/configuration_seamless_m4t.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SeamlessM4T model configuration""" +"""SeamlessM4T model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py b/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py index a90a30f579..b321af02e7 100644 --- a/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py +++ b/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Converting Meta SeamlessM4T checkpoints from seamless_communication to HF.""" - +"""Converting Meta SeamlessM4T checkpoints from seamless_communication to HF.""" import argparse import os diff --git a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py index b23b30056e..d35629938b 100755 --- a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SeamlessM4T model.""" - +"""PyTorch SeamlessM4T model.""" import copy import math diff --git a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py index bb6beb760a..230283a0d4 100644 --- a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for SeamlessM4T.""" + import os from shutil import copyfile from typing import Any, Dict, List, Optional, Tuple, Union diff --git a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py index a236db3cb5..70892c9948 100644 --- a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py +++ b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Fast Tokenization class for SeamlessM4T.""" + import os from shutil import copyfile from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py index 051a86cba1..30082cd5fd 100644 --- a/src/transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SeamlessM4Tv2 model configuration""" +"""SeamlessM4Tv2 model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py b/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py index 8d4320cff5..97a633d05a 100644 --- a/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py +++ b/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Converting Meta SeamlessM4Tv2 checkpoints from seamless_communication to HF.""" - +"""Converting Meta SeamlessM4Tv2 checkpoints from seamless_communication to HF.""" import argparse import os diff --git a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py index 526b697300..a824ff8f24 100644 --- a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SeamlessM4Tv2 model.""" - +"""PyTorch SeamlessM4Tv2 model.""" import copy import math diff --git a/src/transformers/models/segformer/configuration_segformer.py b/src/transformers/models/segformer/configuration_segformer.py index 6aadb64b6f..28fc1a7334 100644 --- a/src/transformers/models/segformer/configuration_segformer.py +++ b/src/transformers/models/segformer/configuration_segformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SegFormer model configuration""" +"""SegFormer model configuration""" import warnings from collections import OrderedDict diff --git a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py index 48dd453309..dbac5ab6b8 100644 --- a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py +++ b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert SegFormer checkpoints.""" - import argparse import json from collections import OrderedDict diff --git a/src/transformers/models/segformer/modeling_segformer.py b/src/transformers/models/segformer/modeling_segformer.py index d47219a0bd..2a2e544c48 100755 --- a/src/transformers/models/segformer/modeling_segformer.py +++ b/src/transformers/models/segformer/modeling_segformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SegFormer model.""" - +"""PyTorch SegFormer model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/segformer/modeling_tf_segformer.py b/src/transformers/models/segformer/modeling_tf_segformer.py index 0657f1b437..0e5fc6d0be 100644 --- a/src/transformers/models/segformer/modeling_tf_segformer.py +++ b/src/transformers/models/segformer/modeling_tf_segformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow SegFormer model.""" - +"""TensorFlow SegFormer model.""" from __future__ import annotations diff --git a/src/transformers/models/seggpt/configuration_seggpt.py b/src/transformers/models/seggpt/configuration_seggpt.py index bac482e97f..f79e7f12b2 100644 --- a/src/transformers/models/seggpt/configuration_seggpt.py +++ b/src/transformers/models/seggpt/configuration_seggpt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SegGpt model configuration""" - +"""SegGpt model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/seggpt/convert_seggpt_to_hf.py b/src/transformers/models/seggpt/convert_seggpt_to_hf.py index a13372dfbb..d67daeab93 100644 --- a/src/transformers/models/seggpt/convert_seggpt_to_hf.py +++ b/src/transformers/models/seggpt/convert_seggpt_to_hf.py @@ -17,7 +17,6 @@ URL: https://github.com/baaivision/Painter/tree/main/SegGPT """ - import argparse import requests diff --git a/src/transformers/models/seggpt/modeling_seggpt.py b/src/transformers/models/seggpt/modeling_seggpt.py index 65672e4213..b84fd8c9d2 100644 --- a/src/transformers/models/seggpt/modeling_seggpt.py +++ b/src/transformers/models/seggpt/modeling_seggpt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SegGpt model.""" - +"""PyTorch SegGpt model.""" import collections.abc import math diff --git a/src/transformers/models/sew/configuration_sew.py b/src/transformers/models/sew/configuration_sew.py index 33ea6d374f..6c877277ae 100644 --- a/src/transformers/models/sew/configuration_sew.py +++ b/src/transformers/models/sew/configuration_sew.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SEW model configuration""" +"""SEW model configuration""" import functools import operator diff --git a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py index 81c3284af8..df0cae2a3b 100644 --- a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert SEW checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index 199d0e543a..ea10b25818 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SEW model.""" +"""PyTorch SEW model.""" import math import warnings diff --git a/src/transformers/models/sew_d/configuration_sew_d.py b/src/transformers/models/sew_d/configuration_sew_d.py index 7a38815b47..ea791935ba 100644 --- a/src/transformers/models/sew_d/configuration_sew_d.py +++ b/src/transformers/models/sew_d/configuration_sew_d.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SEW-D model configuration""" +"""SEW-D model configuration""" import functools import operator diff --git a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py index 7844d7912f..1540efa4be 100644 --- a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert SEW checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/sew_d/modeling_sew_d.py b/src/transformers/models/sew_d/modeling_sew_d.py index 352a86c9c2..2daa6739ef 100644 --- a/src/transformers/models/sew_d/modeling_sew_d.py +++ b/src/transformers/models/sew_d/modeling_sew_d.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SEW model.""" +"""PyTorch SEW model.""" import math import warnings diff --git a/src/transformers/models/siglip/configuration_siglip.py b/src/transformers/models/siglip/configuration_siglip.py index 7692f79abb..ca26919b7e 100644 --- a/src/transformers/models/siglip/configuration_siglip.py +++ b/src/transformers/models/siglip/configuration_siglip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Siglip model configuration""" +"""Siglip model configuration""" import os from typing import Union diff --git a/src/transformers/models/siglip/convert_siglip_to_hf.py b/src/transformers/models/siglip/convert_siglip_to_hf.py index 6adacef84f..163f6f2797 100644 --- a/src/transformers/models/siglip/convert_siglip_to_hf.py +++ b/src/transformers/models/siglip/convert_siglip_to_hf.py @@ -17,7 +17,6 @@ URL: https://github.com/google-research/big_vision/tree/main """ - import argparse import collections from pathlib import Path diff --git a/src/transformers/models/siglip/modeling_siglip.py b/src/transformers/models/siglip/modeling_siglip.py index 87fae3493b..d6607f9ef7 100644 --- a/src/transformers/models/siglip/modeling_siglip.py +++ b/src/transformers/models/siglip/modeling_siglip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Siglip model.""" - +"""PyTorch Siglip model.""" import math import warnings diff --git a/src/transformers/models/siglip/tokenization_siglip.py b/src/transformers/models/siglip/tokenization_siglip.py index 41277320a3..6203c68870 100644 --- a/src/transformers/models/siglip/tokenization_siglip.py +++ b/src/transformers/models/siglip/tokenization_siglip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for SigLIP model.""" +"""Tokenization class for SigLIP model.""" import os import re diff --git a/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py b/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py index 89690a5729..874aa2e066 100644 --- a/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py +++ b/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Wav2Vec2 checkpoint.""" - import argparse import fairseq diff --git a/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py b/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py index 5e726aa9fd..3772889820 100644 --- a/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py +++ b/src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Wav2Vec2 checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py index e3bbd86266..2a15714cff 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support Flax Speech-Encoder-Decoder architectures""" +"""Classes to support Flax Speech-Encoder-Decoder architectures""" import os from typing import Optional, Tuple, Union diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py index 77b69afe8f..d164622cd6 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support Speech-Encoder-Text-Decoder architectures""" - +"""Classes to support Speech-Encoder-Text-Decoder architectures""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/speech_to_text/configuration_speech_to_text.py b/src/transformers/models/speech_to_text/configuration_speech_to_text.py index 2b8e3bd22e..80602e9a7d 100644 --- a/src/transformers/models/speech_to_text/configuration_speech_to_text.py +++ b/src/transformers/models/speech_to_text/configuration_speech_to_text.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Speech2Text model configuration""" +"""Speech2Text model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index 155d6a5dfe..8353a172b2 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Speech2Text model.""" +"""PyTorch Speech2Text model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 91e6028332..bac1256ca4 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow Speech2Text model.""" - +"""TensorFlow Speech2Text model.""" from __future__ import annotations diff --git a/src/transformers/models/speech_to_text/processing_speech_to_text.py b/src/transformers/models/speech_to_text/processing_speech_to_text.py index 42e9006338..646b389994 100644 --- a/src/transformers/models/speech_to_text/processing_speech_to_text.py +++ b/src/transformers/models/speech_to_text/processing_speech_to_text.py @@ -15,6 +15,7 @@ """ Speech processor class for Speech2Text """ + import warnings from contextlib import contextmanager diff --git a/src/transformers/models/speech_to_text/tokenization_speech_to_text.py b/src/transformers/models/speech_to_text/tokenization_speech_to_text.py index 27db0a671e..1b9841f0cf 100644 --- a/src/transformers/models/speech_to_text/tokenization_speech_to_text.py +++ b/src/transformers/models/speech_to_text/tokenization_speech_to_text.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for Speech2Text.""" + import json import os from pathlib import Path diff --git a/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py index 5c9ebbe00f..bcc92a7bd2 100644 --- a/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Speech2Text model configuration""" +"""Speech2Text model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py index 20f8555bd9..35305408e6 100755 --- a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Speech2Text2 model.""" - +"""PyTorch Speech2Text2 model.""" import copy import math diff --git a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py index 47a45d700f..9e0881d89d 100644 --- a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py @@ -15,6 +15,7 @@ """ Speech processor class for Speech2Text2 """ + import warnings from contextlib import contextmanager diff --git a/src/transformers/models/speecht5/configuration_speecht5.py b/src/transformers/models/speecht5/configuration_speecht5.py index 9188325303..d8f4497de8 100644 --- a/src/transformers/models/speecht5/configuration_speecht5.py +++ b/src/transformers/models/speecht5/configuration_speecht5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SpeechT5 model configuration""" +"""SpeechT5 model configuration""" import functools import operator diff --git a/src/transformers/models/speecht5/modeling_speecht5.py b/src/transformers/models/speecht5/modeling_speecht5.py index 175b4b6e79..22ee648b97 100644 --- a/src/transformers/models/speecht5/modeling_speecht5.py +++ b/src/transformers/models/speecht5/modeling_speecht5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SpeechT5 model.""" +"""PyTorch SpeechT5 model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/speecht5/tokenization_speecht5.py b/src/transformers/models/speecht5/tokenization_speecht5.py index 41cb296f8f..97b2feaab3 100644 --- a/src/transformers/models/speecht5/tokenization_speecht5.py +++ b/src/transformers/models/speecht5/tokenization_speecht5.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization class for SpeechT5.""" - import os from shutil import copyfile from typing import Any, Dict, List, Optional, Tuple diff --git a/src/transformers/models/splinter/configuration_splinter.py b/src/transformers/models/splinter/configuration_splinter.py index 83e78e4e4a..9a946fd4be 100644 --- a/src/transformers/models/splinter/configuration_splinter.py +++ b/src/transformers/models/splinter/configuration_splinter.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Splinter model configuration""" +"""Splinter model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/splinter/modeling_splinter.py b/src/transformers/models/splinter/modeling_splinter.py index f3f470258d..6494a57fa4 100755 --- a/src/transformers/models/splinter/modeling_splinter.py +++ b/src/transformers/models/splinter/modeling_splinter.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Splinter model.""" - +"""PyTorch Splinter model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/squeezebert/configuration_squeezebert.py b/src/transformers/models/squeezebert/configuration_squeezebert.py index ec79988849..1f3753ac5c 100644 --- a/src/transformers/models/squeezebert/configuration_squeezebert.py +++ b/src/transformers/models/squeezebert/configuration_squeezebert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SqueezeBERT model configuration""" +"""SqueezeBERT model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index cd5dcb0842..483bac01bd 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SqueezeBert model.""" - +"""PyTorch SqueezeBert model.""" import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/stablelm/configuration_stablelm.py b/src/transformers/models/stablelm/configuration_stablelm.py index 64b39fe20e..da7086ba74 100644 --- a/src/transformers/models/stablelm/configuration_stablelm.py +++ b/src/transformers/models/stablelm/configuration_stablelm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" StableLM model configuration """ +"""StableLM model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 5b81abc693..df4e922c5a 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch StableLM model.""" +"""PyTorch StableLM model.""" + import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/starcoder2/configuration_starcoder2.py b/src/transformers/models/starcoder2/configuration_starcoder2.py index 3bb0d1b655..91f9e94983 100644 --- a/src/transformers/models/starcoder2/configuration_starcoder2.py +++ b/src/transformers/models/starcoder2/configuration_starcoder2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Starcoder2 model configuration""" +"""Starcoder2 model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index e0abef4b41..32e5998884 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -17,7 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Starcoder2 model.""" +"""PyTorch Starcoder2 model.""" + import inspect import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/superpoint/modeling_superpoint.py b/src/transformers/models/superpoint/modeling_superpoint.py index 87473480d7..cfd3dfd86e 100644 --- a/src/transformers/models/superpoint/modeling_superpoint.py +++ b/src/transformers/models/superpoint/modeling_superpoint.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """PyTorch SuperPoint model.""" + from dataclasses import dataclass from typing import Optional, Tuple, Union diff --git a/src/transformers/models/swiftformer/configuration_swiftformer.py b/src/transformers/models/swiftformer/configuration_swiftformer.py index be95094bac..abfdf51652 100644 --- a/src/transformers/models/swiftformer/configuration_swiftformer.py +++ b/src/transformers/models/swiftformer/configuration_swiftformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" SwiftFormer model configuration""" +"""SwiftFormer model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py index f6cabb34b6..21ecebebe2 100644 --- a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py +++ b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert SwiftFormer checkpoints from the original implementation.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/swiftformer/modeling_swiftformer.py b/src/transformers/models/swiftformer/modeling_swiftformer.py index 68a0ee53b0..bd86c3d717 100644 --- a/src/transformers/models/swiftformer/modeling_swiftformer.py +++ b/src/transformers/models/swiftformer/modeling_swiftformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SwiftFormer model.""" - +"""PyTorch SwiftFormer model.""" import collections.abc from typing import Optional, Tuple, Union diff --git a/src/transformers/models/swiftformer/modeling_tf_swiftformer.py b/src/transformers/models/swiftformer/modeling_tf_swiftformer.py index 271bd5e280..3f1d19e9e3 100644 --- a/src/transformers/models/swiftformer/modeling_tf_swiftformer.py +++ b/src/transformers/models/swiftformer/modeling_tf_swiftformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow SwiftFormer model.""" - +"""TensorFlow SwiftFormer model.""" import collections.abc from typing import Optional, Tuple, Union diff --git a/src/transformers/models/swin/configuration_swin.py b/src/transformers/models/swin/configuration_swin.py index 281d0f047b..321648f149 100644 --- a/src/transformers/models/swin/configuration_swin.py +++ b/src/transformers/models/swin/configuration_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Swin Transformer model configuration""" +"""Swin Transformer model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py index 156b0ba86c..6402346289 100644 --- a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py +++ b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py @@ -95,15 +95,15 @@ def convert_state_dict(orig_state_dict, model): dim = model.swin.encoder.layers[layer_num].blocks[block_num].attention.self.all_head_size if "weight" in key: - orig_state_dict[ - f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight" - ] = val[:dim, :] + orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight"] = ( + val[:dim, :] + ) orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"] = val[ dim : dim * 2, : ] - orig_state_dict[ - f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight" - ] = val[-dim:, :] + orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight"] = ( + val[-dim:, :] + ) else: orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"] = val[ :dim diff --git a/src/transformers/models/swin/convert_swin_timm_to_pytorch.py b/src/transformers/models/swin/convert_swin_timm_to_pytorch.py index 828237490e..c91249b272 100644 --- a/src/transformers/models/swin/convert_swin_timm_to_pytorch.py +++ b/src/transformers/models/swin/convert_swin_timm_to_pytorch.py @@ -102,15 +102,15 @@ def convert_state_dict(orig_state_dict, model): dim = model.swin.encoder.layers[layer_num].blocks[block_num].attention.self.all_head_size if "weight" in key: - orig_state_dict[ - f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight" - ] = val[:dim, :] + orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight"] = ( + val[:dim, :] + ) orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"] = val[ dim : dim * 2, : ] - orig_state_dict[ - f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight" - ] = val[-dim:, :] + orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight"] = ( + val[-dim:, :] + ) else: orig_state_dict[f"swin.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"] = val[ :dim diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 9f64f8009a..13c20f59e9 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Swin Transformer model.""" - +"""PyTorch Swin Transformer model.""" import collections.abc import math diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index 99da3d7f1e..035b31e8d4 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 Swin Transformer model.""" - +"""TF 2.0 Swin Transformer model.""" from __future__ import annotations diff --git a/src/transformers/models/swin2sr/configuration_swin2sr.py b/src/transformers/models/swin2sr/configuration_swin2sr.py index 98177a804a..0d910e89e4 100644 --- a/src/transformers/models/swin2sr/configuration_swin2sr.py +++ b/src/transformers/models/swin2sr/configuration_swin2sr.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Swin2SR Transformer model configuration""" +"""Swin2SR Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py index 6884bf0afc..f053128339 100644 --- a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py +++ b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py @@ -137,22 +137,22 @@ def convert_state_dict(orig_state_dict, config): orig_state_dict[ f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.query.weight" ] = val[:dim, :] - orig_state_dict[ - f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.key.weight" - ] = val[dim : dim * 2, :] + orig_state_dict[f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.key.weight"] = ( + val[dim : dim * 2, :] + ) orig_state_dict[ f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.value.weight" ] = val[-dim:, :] else: - orig_state_dict[ - f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.query.bias" - ] = val[:dim] - orig_state_dict[ - f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.key.bias" - ] = val[dim : dim * 2] - orig_state_dict[ - f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.value.bias" - ] = val[-dim:] + orig_state_dict[f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.query.bias"] = ( + val[:dim] + ) + orig_state_dict[f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.key.bias"] = ( + val[dim : dim * 2] + ) + orig_state_dict[f"swin2sr.encoder.stages.{stage_num}.layers.{block_num}.attention.self.value.bias"] = ( + val[-dim:] + ) pass else: orig_state_dict[rename_key(key, config)] = val diff --git a/src/transformers/models/swin2sr/modeling_swin2sr.py b/src/transformers/models/swin2sr/modeling_swin2sr.py index 96b67da51e..64fad4d9b2 100644 --- a/src/transformers/models/swin2sr/modeling_swin2sr.py +++ b/src/transformers/models/swin2sr/modeling_swin2sr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Swin2SR Transformer model.""" - +"""PyTorch Swin2SR Transformer model.""" import collections.abc import math diff --git a/src/transformers/models/swinv2/configuration_swinv2.py b/src/transformers/models/swinv2/configuration_swinv2.py index 17e924804c..c6032c45df 100644 --- a/src/transformers/models/swinv2/configuration_swinv2.py +++ b/src/transformers/models/swinv2/configuration_swinv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Swinv2 Transformer model configuration""" +"""Swinv2 Transformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py b/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py index 21deda864c..0e6e837a7e 100644 --- a/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py +++ b/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py @@ -145,22 +145,22 @@ def convert_state_dict(orig_state_dict, model): orig_state_dict[ f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight" ] = val[:dim, :] - orig_state_dict[ - f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight" - ] = val[dim : dim * 2, :] + orig_state_dict[f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"] = ( + val[dim : dim * 2, :] + ) orig_state_dict[ f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight" ] = val[-dim:, :] else: - orig_state_dict[ - f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias" - ] = val[:dim] + orig_state_dict[f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"] = ( + val[:dim] + ) orig_state_dict[f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias"] = val[ dim : dim * 2 ] - orig_state_dict[ - f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias" - ] = val[-dim:] + orig_state_dict[f"swinv2.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias"] = ( + val[-dim:] + ) else: orig_state_dict[rename_key(key)] = val diff --git a/src/transformers/models/swinv2/modeling_swinv2.py b/src/transformers/models/swinv2/modeling_swinv2.py index 48a4c65722..b0682eacaa 100644 --- a/src/transformers/models/swinv2/modeling_swinv2.py +++ b/src/transformers/models/swinv2/modeling_swinv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Swinv2 Transformer model.""" - +"""PyTorch Swinv2 Transformer model.""" import collections.abc import math diff --git a/src/transformers/models/switch_transformers/configuration_switch_transformers.py b/src/transformers/models/switch_transformers/configuration_switch_transformers.py index fe96297777..5ed95f2b61 100644 --- a/src/transformers/models/switch_transformers/configuration_switch_transformers.py +++ b/src/transformers/models/switch_transformers/configuration_switch_transformers.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Switch Transformers model configuration""" +"""Switch Transformers model configuration""" + from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/switch_transformers/modeling_switch_transformers.py b/src/transformers/models/switch_transformers/modeling_switch_transformers.py index 395d055177..3701b30a22 100644 --- a/src/transformers/models/switch_transformers/modeling_switch_transformers.py +++ b/src/transformers/models/switch_transformers/modeling_switch_transformers.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch SwitchTransformers model.""" - +"""PyTorch SwitchTransformers model.""" import copy import math diff --git a/src/transformers/models/t5/configuration_t5.py b/src/transformers/models/t5/configuration_t5.py index eec47e5eb2..e5f2615611 100644 --- a/src/transformers/models/t5/configuration_t5.py +++ b/src/transformers/models/t5/configuration_t5.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" T5 model configuration""" +"""T5 model configuration""" + from typing import Mapping from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py index 7d9a20f3b0..9b1b15857c 100755 --- a/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert T5 checkpoint.""" - import argparse from transformers import T5Config, T5ForConditionalGeneration, load_tf_weights_in_t5 diff --git a/src/transformers/models/t5/convert_t5x_checkpoint_to_flax.py b/src/transformers/models/t5/convert_t5x_checkpoint_to_flax.py index 11f32c8461..91ac9f08a0 100644 --- a/src/transformers/models/t5/convert_t5x_checkpoint_to_flax.py +++ b/src/transformers/models/t5/convert_t5x_checkpoint_to_flax.py @@ -54,22 +54,22 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f t5x_mlp_layer_norm = t5x_model["target"]["encoder"][layer_name]["pre_mlp_layer_norm"]["scale"] # Assigning - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"][ - "kernel" - ] = t5x_attention_key - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"][ - "kernel" - ] = t5x_attention_out - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"][ - "kernel" - ] = t5x_attention_query - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"][ - "kernel" - ] = t5x_attention_value + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = ( + t5x_attention_key + ) + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = ( + t5x_attention_out + ) + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = ( + t5x_attention_query + ) + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = ( + t5x_attention_value + ) - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"][ - "weight" - ] = t5x_attention_layer_norm + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = ( + t5x_attention_layer_norm + ) if split_mlp_wi: flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wi_0"][ @@ -79,16 +79,16 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f "kernel" ] = t5x_mlp_wi_1 else: - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wi"][ - "kernel" - ] = t5x_mlp_wi + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wi"]["kernel"] = ( + t5x_mlp_wi + ) - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wo"][ - "kernel" - ] = t5x_mlp_wo - flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"][ - "weight" - ] = t5x_mlp_layer_norm + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wo"]["kernel"] = ( + t5x_mlp_wo + ) + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = ( + t5x_mlp_layer_norm + ) # Only for layer 0: t5x_encoder_rel_embedding = t5x_model["target"]["encoder"]["relpos_bias"]["rel_embedding"].T @@ -145,39 +145,39 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f tx5_mlp_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_mlp_layer_norm"]["scale"] # Assigning - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"][ - "kernel" - ] = t5x_attention_key - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"][ - "kernel" - ] = t5x_attention_out - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"][ - "kernel" - ] = t5x_attention_query - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"][ - "kernel" - ] = t5x_attention_value + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = ( + t5x_attention_key + ) + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = ( + t5x_attention_out + ) + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = ( + t5x_attention_query + ) + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = ( + t5x_attention_value + ) - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"][ - "weight" - ] = t5x_pre_attention_layer_norm + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = ( + t5x_pre_attention_layer_norm + ) - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["k"][ - "kernel" - ] = t5x_enc_dec_attention_key - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["o"][ - "kernel" - ] = t5x_enc_dec_attention_out - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["q"][ - "kernel" - ] = t5x_enc_dec_attention_query - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["v"][ - "kernel" - ] = t5x_enc_dec_attention_value + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["k"]["kernel"] = ( + t5x_enc_dec_attention_key + ) + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["o"]["kernel"] = ( + t5x_enc_dec_attention_out + ) + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["q"]["kernel"] = ( + t5x_enc_dec_attention_query + ) + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["v"]["kernel"] = ( + t5x_enc_dec_attention_value + ) - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"][ - "weight" - ] = t5x_cross_layer_norm + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = ( + t5x_cross_layer_norm + ) if split_mlp_wi: flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wi_0"][ @@ -187,17 +187,17 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f "kernel" ] = t5x_mlp_wi_1 else: - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wi"][ - "kernel" - ] = t5x_mlp_wi + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wi"]["kernel"] = ( + t5x_mlp_wi + ) - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wo"][ - "kernel" - ] = t5x_mlp_wo + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wo"]["kernel"] = ( + t5x_mlp_wo + ) - flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["layer_norm"][ - "weight" - ] = tx5_mlp_layer_norm + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["layer_norm"]["weight"] = ( + tx5_mlp_layer_norm + ) # Decoder Normalization tx5_decoder_norm = t5x_model["target"]["decoder"]["decoder_norm"]["scale"] diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index 94b24bd42f..be5ffd4489 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax T5 model.""" - +"""Flax T5 model.""" import copy from typing import Callable, Optional, Tuple diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 81dff54f99..224769fdfe 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch T5 model.""" - +"""PyTorch T5 model.""" import copy import math diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index d7a9c0a17d..6cd44766bf 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 T5 model.""" - +"""TF 2.0 T5 model.""" from __future__ import annotations diff --git a/src/transformers/models/t5/tokenization_t5.py b/src/transformers/models/t5/tokenization_t5.py index 7292808adc..0f2ae101c8 100644 --- a/src/transformers/models/t5/tokenization_t5.py +++ b/src/transformers/models/t5/tokenization_t5.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model T5.""" - +"""Tokenization class for model T5.""" import os import re diff --git a/src/transformers/models/t5/tokenization_t5_fast.py b/src/transformers/models/t5/tokenization_t5_fast.py index e9f2033812..0a92803f16 100644 --- a/src/transformers/models/t5/tokenization_t5_fast.py +++ b/src/transformers/models/t5/tokenization_t5_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for model T5.""" - +"""Tokenization class for model T5.""" import os import re diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 8553ad1a65..f6647c5a4e 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Table Transformer model configuration""" +"""Table Transformer model configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py b/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py index d06c3eb26b..487cdc4819 100644 --- a/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py +++ b/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py @@ -17,7 +17,6 @@ URL: https://github.com/microsoft/table-transformer """ - import argparse from collections import OrderedDict from pathlib import Path diff --git a/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py b/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py index 0a2b7b87fe..1073d48877 100644 --- a/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py +++ b/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py @@ -17,7 +17,6 @@ URL: https://github.com/microsoft/table-transformer """ - import argparse from pathlib import Path diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index 3bfdde8907..a8c2593752 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Table Transformer model.""" - +"""PyTorch Table Transformer model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/tapas/configuration_tapas.py b/src/transformers/models/tapas/configuration_tapas.py index cbc5cebf4e..63d289e38f 100644 --- a/src/transformers/models/tapas/configuration_tapas.py +++ b/src/transformers/models/tapas/configuration_tapas.py @@ -22,7 +22,6 @@ Hyperparameters are taken from run_task_main.py and hparam_utils.py of the origi """ - from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py index 2772a7f126..34bf77cccd 100644 --- a/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert TAPAS checkpoint.""" - import argparse from transformers import ( diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index 903fa2a225..a06770778e 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -14,7 +14,6 @@ # limitations under the License. """PyTorch TAPAS model.""" - import enum import math import os diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index b26803cecb..3515dfe655 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -14,7 +14,6 @@ # limitations under the License. """TF 2.0 TAPAS model.""" - from __future__ import annotations import enum diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py index 23fbd5300e..529ecb444e 100644 --- a/src/transformers/models/tapas/tokenization_tapas.py +++ b/src/transformers/models/tapas/tokenization_tapas.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization class for TAPAS model.""" - +"""Tokenization class for TAPAS model.""" import collections import datetime diff --git a/src/transformers/models/time_series_transformer/configuration_time_series_transformer.py b/src/transformers/models/time_series_transformer/configuration_time_series_transformer.py index 8c74f15174..56b06c0384 100644 --- a/src/transformers/models/time_series_transformer/configuration_time_series_transformer.py +++ b/src/transformers/models/time_series_transformer/configuration_time_series_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Time Series Transformer model configuration""" +"""Time Series Transformer model configuration""" from typing import List, Optional, Union diff --git a/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py b/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py index dd7a2228a3..b45e6d7e85 100644 --- a/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py +++ b/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Time Series Transformer model.""" +"""PyTorch Time Series Transformer model.""" from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/timesformer/configuration_timesformer.py b/src/transformers/models/timesformer/configuration_timesformer.py index ebcfcc8248..2ee7125de2 100644 --- a/src/transformers/models/timesformer/configuration_timesformer.py +++ b/src/transformers/models/timesformer/configuration_timesformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TimeSformer model configuration""" +"""TimeSformer model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/timesformer/modeling_timesformer.py b/src/transformers/models/timesformer/modeling_timesformer.py index f85e945247..2262898d54 100644 --- a/src/transformers/models/timesformer/modeling_timesformer.py +++ b/src/transformers/models/timesformer/modeling_timesformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch TimeSformer model.""" - +"""PyTorch TimeSformer model.""" import collections from typing import Optional, Tuple, Union diff --git a/src/transformers/models/timm_backbone/configuration_timm_backbone.py b/src/transformers/models/timm_backbone/configuration_timm_backbone.py index 0f2f1b0b6c..6171bcce1e 100644 --- a/src/transformers/models/timm_backbone/configuration_timm_backbone.py +++ b/src/transformers/models/timm_backbone/configuration_timm_backbone.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Configuration for Backbone models""" +"""Configuration for Backbone models""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/trocr/configuration_trocr.py b/src/transformers/models/trocr/configuration_trocr.py index efa20d884e..f47412e93a 100644 --- a/src/transformers/models/trocr/configuration_trocr.py +++ b/src/transformers/models/trocr/configuration_trocr.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TrOCR model configuration""" +"""TrOCR model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/trocr/convert_trocr_unilm_to_pytorch.py b/src/transformers/models/trocr/convert_trocr_unilm_to_pytorch.py index 428406d82c..a787932b76 100644 --- a/src/transformers/models/trocr/convert_trocr_unilm_to_pytorch.py +++ b/src/transformers/models/trocr/convert_trocr_unilm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert TrOCR checkpoints from the unilm repository.""" - import argparse from pathlib import Path diff --git a/src/transformers/models/trocr/modeling_trocr.py b/src/transformers/models/trocr/modeling_trocr.py index 1a0766fc25..04eb40ab2a 100644 --- a/src/transformers/models/trocr/modeling_trocr.py +++ b/src/transformers/models/trocr/modeling_trocr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch TrOCR decoder model (based on RoBERTa).""" - +"""PyTorch TrOCR decoder model (based on RoBERTa).""" import copy import math diff --git a/src/transformers/models/trocr/processing_trocr.py b/src/transformers/models/trocr/processing_trocr.py index e7ce7362d4..b0d2e823fe 100644 --- a/src/transformers/models/trocr/processing_trocr.py +++ b/src/transformers/models/trocr/processing_trocr.py @@ -15,6 +15,7 @@ """ Processor class for TrOCR. """ + import warnings from contextlib import contextmanager diff --git a/src/transformers/models/tvlt/configuration_tvlt.py b/src/transformers/models/tvlt/configuration_tvlt.py index fbf65effd9..1a1782f68c 100644 --- a/src/transformers/models/tvlt/configuration_tvlt.py +++ b/src/transformers/models/tvlt/configuration_tvlt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TVLT model configuration""" +"""TVLT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/tvlt/image_processing_tvlt.py b/src/transformers/models/tvlt/image_processing_tvlt.py index f13101c15a..06576a0f7e 100644 --- a/src/transformers/models/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/tvlt/image_processing_tvlt.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Image processor class for TVLT.""" + from typing import Dict, List, Optional, Union import numpy as np diff --git a/src/transformers/models/tvlt/modeling_tvlt.py b/src/transformers/models/tvlt/modeling_tvlt.py index 0376570fe5..d49fef5822 100644 --- a/src/transformers/models/tvlt/modeling_tvlt.py +++ b/src/transformers/models/tvlt/modeling_tvlt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch TVLT model.""" - +"""PyTorch TVLT model.""" import collections.abc import math diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 65c4e3a522..dfd3a04ee7 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND=, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TVP model configuration""" +"""TVP model configuration""" import copy diff --git a/src/transformers/models/tvp/processing_tvp.py b/src/transformers/models/tvp/processing_tvp.py index 4e27399ab8..eb8aabfdad 100644 --- a/src/transformers/models/tvp/processing_tvp.py +++ b/src/transformers/models/tvp/processing_tvp.py @@ -16,7 +16,6 @@ Processor class for TVP. """ - from ...processing_utils import ProcessorMixin from ...tokenization_utils_base import BatchEncoding diff --git a/src/transformers/models/udop/configuration_udop.py b/src/transformers/models/udop/configuration_udop.py index 3802b6de01..bc1765e289 100644 --- a/src/transformers/models/udop/configuration_udop.py +++ b/src/transformers/models/udop/configuration_udop.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" UDOP model configuration""" - +"""UDOP model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/udop/convert_udop_to_hf.py b/src/transformers/models/udop/convert_udop_to_hf.py index 7cbb2f161d..f2d54b8ca5 100644 --- a/src/transformers/models/udop/convert_udop_to_hf.py +++ b/src/transformers/models/udop/convert_udop_to_hf.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert UDOP checkpoints from the original repository. URL: https://github.com/microsoft/i-Code/tree/main/i-Code-Doc""" - import argparse import torch diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index 0a2d003a56..02e7aac88b 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch UDOP model.""" +"""PyTorch UDOP model.""" import collections import logging diff --git a/src/transformers/models/udop/tokenization_udop.py b/src/transformers/models/udop/tokenization_udop.py index 3eaf1f5cad..704b5c48de 100644 --- a/src/transformers/models/udop/tokenization_udop.py +++ b/src/transformers/models/udop/tokenization_udop.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for UDOP model.""" - +"""Tokenization classes for UDOP model.""" import os import re diff --git a/src/transformers/models/udop/tokenization_udop_fast.py b/src/transformers/models/udop/tokenization_udop_fast.py index db3b90e05c..caa96c25f4 100644 --- a/src/transformers/models/udop/tokenization_udop_fast.py +++ b/src/transformers/models/udop/tokenization_udop_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for UDOP model.""" - +"""Tokenization classes for UDOP model.""" import os from shutil import copyfile diff --git a/src/transformers/models/umt5/configuration_umt5.py b/src/transformers/models/umt5/configuration_umt5.py index 9365717c28..d7323d759f 100644 --- a/src/transformers/models/umt5/configuration_umt5.py +++ b/src/transformers/models/umt5/configuration_umt5.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" UMT5 model configuration""" +"""UMT5 model configuration""" + from typing import Mapping from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/umt5/convert_umt5_checkpoint_to_pytorch.py b/src/transformers/models/umt5/convert_umt5_checkpoint_to_pytorch.py index eeb5b3eb40..848ca3c566 100644 --- a/src/transformers/models/umt5/convert_umt5_checkpoint_to_pytorch.py +++ b/src/transformers/models/umt5/convert_umt5_checkpoint_to_pytorch.py @@ -166,9 +166,9 @@ def convert_t5x_to_pytorch( if scalable_attention: # convert the rel_embedding of each layer - new[ - f"decoder.block.{i}.layer.0.SelfAttention.relative_attention_bias.weight" - ] = t5x_relpos_bias_lookup(old, i, "decoder").T + new[f"decoder.block.{i}.layer.0.SelfAttention.relative_attention_bias.weight"] = ( + t5x_relpos_bias_lookup(old, i, "decoder").T + ) new["decoder.final_layer_norm.weight"] = old["decoder/decoder_norm/scale"] diff --git a/src/transformers/models/umt5/modeling_umt5.py b/src/transformers/models/umt5/modeling_umt5.py index 1bf8469f77..3271689540 100644 --- a/src/transformers/models/umt5/modeling_umt5.py +++ b/src/transformers/models/umt5/modeling_umt5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch UMT5 model.""" +"""PyTorch UMT5 model.""" import copy import math diff --git a/src/transformers/models/unispeech/configuration_unispeech.py b/src/transformers/models/unispeech/configuration_unispeech.py index 18502adcb0..69bc162220 100644 --- a/src/transformers/models/unispeech/configuration_unispeech.py +++ b/src/transformers/models/unispeech/configuration_unispeech.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" UniSpeech model configuration""" +"""UniSpeech model configuration""" import functools import operator diff --git a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py index bf72930951..4eb8dfa7bb 100644 --- a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert UniSpeech checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index aa4bb7827e..0db34c1fc8 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch UniSpeech model.""" +"""PyTorch UniSpeech model.""" import math import warnings diff --git a/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py b/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py index 87b4bc8506..85661b02b6 100644 --- a/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/configuration_unispeech_sat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" UniSpeechSat model configuration""" +"""UniSpeechSat model configuration""" import functools import operator diff --git a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py index 56c9d52e18..fca35acb63 100644 --- a/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Hubert checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py index 93750b64cc..4a70d41dd2 100644 --- a/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert UniSpeechSat checkpoint.""" - import argparse import fairseq diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index 4663fc05d8..b78ffb4ac1 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch UniSpeechSat model.""" +"""PyTorch UniSpeechSat model.""" import math import warnings diff --git a/src/transformers/models/univnet/configuration_univnet.py b/src/transformers/models/univnet/configuration_univnet.py index 27850e114d..0f4dceb479 100644 --- a/src/transformers/models/univnet/configuration_univnet.py +++ b/src/transformers/models/univnet/configuration_univnet.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" UnivNetModel model configuration""" +"""UnivNetModel model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/univnet/modeling_univnet.py b/src/transformers/models/univnet/modeling_univnet.py index e4fc1215c0..5b0c659c30 100644 --- a/src/transformers/models/univnet/modeling_univnet.py +++ b/src/transformers/models/univnet/modeling_univnet.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch UnivNetModel model.""" +"""PyTorch UnivNetModel model.""" from dataclasses import dataclass from typing import Optional, Tuple, Union diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 609818c80d..8d9ba47777 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" UperNet model configuration""" - +"""UperNet model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/upernet/modeling_upernet.py b/src/transformers/models/upernet/modeling_upernet.py index 58f64995ae..4a5b915e57 100644 --- a/src/transformers/models/upernet/modeling_upernet.py +++ b/src/transformers/models/upernet/modeling_upernet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.""" +"""PyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.""" from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/video_llava/configuration_video_llava.py b/src/transformers/models/video_llava/configuration_video_llava.py index d05f0e51e1..9fd236e595 100644 --- a/src/transformers/models/video_llava/configuration_video_llava.py +++ b/src/transformers/models/video_llava/configuration_video_llava.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VideoLlava model configuration""" +"""VideoLlava model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/video_llava/modeling_video_llava.py b/src/transformers/models/video_llava/modeling_video_llava.py index 7fbd142fbe..1059479334 100644 --- a/src/transformers/models/video_llava/modeling_video_llava.py +++ b/src/transformers/models/video_llava/modeling_video_llava.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch VideoLlava model.""" +"""PyTorch VideoLlava model.""" + from dataclasses import dataclass from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/video_llava/processing_video_llava.py b/src/transformers/models/video_llava/processing_video_llava.py index b71600501b..e29c3478aa 100644 --- a/src/transformers/models/video_llava/processing_video_llava.py +++ b/src/transformers/models/video_llava/processing_video_llava.py @@ -16,7 +16,6 @@ Processor class for VideoLlava. """ - from typing import List, Optional, Union from ...feature_extraction_utils import BatchFeature diff --git a/src/transformers/models/videomae/configuration_videomae.py b/src/transformers/models/videomae/configuration_videomae.py index b1cfcaecfa..a6150f3433 100644 --- a/src/transformers/models/videomae/configuration_videomae.py +++ b/src/transformers/models/videomae/configuration_videomae.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VideoMAE model configuration""" +"""VideoMAE model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/videomae/modeling_videomae.py b/src/transformers/models/videomae/modeling_videomae.py index 05f74328b4..73a680ba3a 100755 --- a/src/transformers/models/videomae/modeling_videomae.py +++ b/src/transformers/models/videomae/modeling_videomae.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch VideoMAE (masked autoencoder) model.""" - +"""PyTorch VideoMAE (masked autoencoder) model.""" import collections.abc import math diff --git a/src/transformers/models/vilt/configuration_vilt.py b/src/transformers/models/vilt/configuration_vilt.py index ef0ce550d2..a57b400682 100644 --- a/src/transformers/models/vilt/configuration_vilt.py +++ b/src/transformers/models/vilt/configuration_vilt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VilT model configuration""" +"""VilT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py b/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py index e597d0d7e7..79b9f3ba03 100644 --- a/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py +++ b/src/transformers/models/vilt/convert_vilt_original_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ViLT checkpoints from the original Github repository.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py index 4395bb47b5..bb564c1ceb 100755 --- a/src/transformers/models/vilt/modeling_vilt.py +++ b/src/transformers/models/vilt/modeling_vilt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViLT model.""" +"""PyTorch ViLT model.""" import collections.abc import math diff --git a/src/transformers/models/vipllava/configuration_vipllava.py b/src/transformers/models/vipllava/configuration_vipllava.py index e94d4be6c1..d98099b21b 100644 --- a/src/transformers/models/vipllava/configuration_vipllava.py +++ b/src/transformers/models/vipllava/configuration_vipllava.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VipLlava model configuration""" +"""VipLlava model configuration""" import warnings diff --git a/src/transformers/models/vipllava/modeling_vipllava.py b/src/transformers/models/vipllava/modeling_vipllava.py index 02821b03a2..c5f856e787 100644 --- a/src/transformers/models/vipllava/modeling_vipllava.py +++ b/src/transformers/models/vipllava/modeling_vipllava.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch VipLlava model.""" +"""PyTorch VipLlava model.""" + from dataclasses import dataclass from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py index 987c9a1afa..1af006a352 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support Vision-Encoder-Text-Decoder architectures""" - +"""Classes to support Vision-Encoder-Text-Decoder architectures""" import os from typing import Optional, Tuple, Union diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 75ff2dbd82..383dd0e3e4 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support TF Vision-Encoder-Text-Decoder architectures""" - +"""Classes to support TF Vision-Encoder-Text-Decoder architectures""" from __future__ import annotations diff --git a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py index 1dbee5efcc..b6125fb4db 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Classes to support Vision-Encoder-Text-Decoder architectures""" - +"""Classes to support Vision-Encoder-Text-Decoder architectures""" import gc import os diff --git a/src/transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py index aab76f71db..a3191464ec 100644 --- a/src/transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VisionTextDualEncoder model configuration""" - +"""VisionTextDualEncoder model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py index ba8bf7091b..23244af1e3 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax VisionTextDualEncoder model.""" - +"""Flax VisionTextDualEncoder model.""" from typing import Optional, Tuple diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py index 6f7e30d3f6..077b452f70 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py @@ -14,7 +14,6 @@ # limitations under the License. """TensorFlow VisionTextDualEncoder model.""" - from __future__ import annotations import re diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index 0f82bdd0c3..5b90faa886 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch VisionTextDualEncoder model.""" - +"""PyTorch VisionTextDualEncoder model.""" from typing import Optional, Tuple, Union diff --git a/src/transformers/models/visual_bert/configuration_visual_bert.py b/src/transformers/models/visual_bert/configuration_visual_bert.py index bb146a143a..ae98229a7d 100644 --- a/src/transformers/models/visual_bert/configuration_visual_bert.py +++ b/src/transformers/models/visual_bert/configuration_visual_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VisualBERT model configuration""" +"""VisualBERT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py index d1e95630bd..dd03623c88 100644 --- a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert VisualBert checkpoint.""" - import argparse from collections import OrderedDict from pathlib import Path diff --git a/src/transformers/models/visual_bert/modeling_visual_bert.py b/src/transformers/models/visual_bert/modeling_visual_bert.py index f482ed8cd7..41c8709b3e 100755 --- a/src/transformers/models/visual_bert/modeling_visual_bert.py +++ b/src/transformers/models/visual_bert/modeling_visual_bert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch VisualBERT model.""" - +"""PyTorch VisualBERT model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/vit/configuration_vit.py b/src/transformers/models/vit/configuration_vit.py index 286d302c78..bacec851a9 100644 --- a/src/transformers/models/vit/configuration_vit.py +++ b/src/transformers/models/vit/configuration_vit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ViT model configuration""" +"""ViT model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/vit/convert_dino_to_pytorch.py b/src/transformers/models/vit/convert_dino_to_pytorch.py index 7eec823ad5..8608da8eb4 100644 --- a/src/transformers/models/vit/convert_dino_to_pytorch.py +++ b/src/transformers/models/vit/convert_dino_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ViT checkpoints trained with the DINO method.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/vit/convert_vit_timm_to_pytorch.py b/src/transformers/models/vit/convert_vit_timm_to_pytorch.py index 0ccd9b9f66..7892842f8d 100644 --- a/src/transformers/models/vit/convert_vit_timm_to_pytorch.py +++ b/src/transformers/models/vit/convert_vit_timm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ViT and non-distilled DeiT checkpoints from the timm library.""" - import argparse from pathlib import Path diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index ac5cf691e9..2cf120df74 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 ViT model.""" - +"""TF 2.0 ViT model.""" from __future__ import annotations diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index dfda7bf731..7897555a6b 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViT model.""" - +"""PyTorch ViT model.""" import collections.abc import math diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index 2b9dcd0a81..78349af336 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ViT Hybrid model configuration""" - +"""ViT Hybrid model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py b/src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py index e88ee246ba..1d717d74c9 100644 --- a/src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py +++ b/src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert ViT hybrid checkpoints from the timm library.""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/vit_hybrid/modeling_vit_hybrid.py b/src/transformers/models/vit_hybrid/modeling_vit_hybrid.py index 359b5e3fb9..9c92e592c1 100644 --- a/src/transformers/models/vit_hybrid/modeling_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/modeling_vit_hybrid.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViT Hybrid model.""" - +"""PyTorch ViT Hybrid model.""" import collections.abc import math diff --git a/src/transformers/models/vit_mae/configuration_vit_mae.py b/src/transformers/models/vit_mae/configuration_vit_mae.py index e4e46e7e42..d20b5af130 100644 --- a/src/transformers/models/vit_mae/configuration_vit_mae.py +++ b/src/transformers/models/vit_mae/configuration_vit_mae.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ViT MAE model configuration""" +"""ViT MAE model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index fff8234e06..cc7a29b192 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 ViT MAE (masked autoencoder) model.""" - +"""TF 2.0 ViT MAE (masked autoencoder) model.""" from __future__ import annotations diff --git a/src/transformers/models/vit_mae/modeling_vit_mae.py b/src/transformers/models/vit_mae/modeling_vit_mae.py index f434efbe3e..f2afd84fc1 100755 --- a/src/transformers/models/vit_mae/modeling_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_vit_mae.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViT MAE (masked autoencoder) model.""" - +"""PyTorch ViT MAE (masked autoencoder) model.""" import collections.abc import math diff --git a/src/transformers/models/vit_msn/configuration_vit_msn.py b/src/transformers/models/vit_msn/configuration_vit_msn.py index 14acb15d54..7cf4414f8d 100644 --- a/src/transformers/models/vit_msn/configuration_vit_msn.py +++ b/src/transformers/models/vit_msn/configuration_vit_msn.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ViT MSN model configuration""" - +"""ViT MSN model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vit_msn/modeling_vit_msn.py b/src/transformers/models/vit_msn/modeling_vit_msn.py index fe5a880027..c89370be5c 100644 --- a/src/transformers/models/vit_msn/modeling_vit_msn.py +++ b/src/transformers/models/vit_msn/modeling_vit_msn.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViT MSN (masked siamese network) model.""" - +"""PyTorch ViT MSN (masked siamese network) model.""" import collections.abc import math diff --git a/src/transformers/models/vitdet/configuration_vitdet.py b/src/transformers/models/vitdet/configuration_vitdet.py index f85558c254..856f228a5b 100644 --- a/src/transformers/models/vitdet/configuration_vitdet.py +++ b/src/transformers/models/vitdet/configuration_vitdet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VitDet model configuration""" - +"""VitDet model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vitdet/modeling_vitdet.py b/src/transformers/models/vitdet/modeling_vitdet.py index 9dc3e476f3..edf934581c 100644 --- a/src/transformers/models/vitdet/modeling_vitdet.py +++ b/src/transformers/models/vitdet/modeling_vitdet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViTDet backbone.""" - +"""PyTorch ViTDet backbone.""" import collections.abc import math diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 67f562d2bd..0083bc1ef2 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VitMatte model configuration""" +"""VitMatte model configuration""" import copy from typing import List diff --git a/src/transformers/models/vitmatte/modeling_vitmatte.py b/src/transformers/models/vitmatte/modeling_vitmatte.py index f7bfb06d9a..fca94e7032 100644 --- a/src/transformers/models/vitmatte/modeling_vitmatte.py +++ b/src/transformers/models/vitmatte/modeling_vitmatte.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViTMatte model.""" +"""PyTorch ViTMatte model.""" from dataclasses import dataclass from typing import Optional, Tuple diff --git a/src/transformers/models/vits/configuration_vits.py b/src/transformers/models/vits/configuration_vits.py index 8d5ffca36f..0f2aeb8ac4 100644 --- a/src/transformers/models/vits/configuration_vits.py +++ b/src/transformers/models/vits/configuration_vits.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" VITS model configuration""" - +"""VITS model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vits/modeling_vits.py b/src/transformers/models/vits/modeling_vits.py index 905945e01a..def1361cc0 100644 --- a/src/transformers/models/vits/modeling_vits.py +++ b/src/transformers/models/vits/modeling_vits.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch VITS model.""" +"""PyTorch VITS model.""" import math from dataclasses import dataclass diff --git a/src/transformers/models/vits/tokenization_vits.py b/src/transformers/models/vits/tokenization_vits.py index c8b115c176..4c02857483 100644 --- a/src/transformers/models/vits/tokenization_vits.py +++ b/src/transformers/models/vits/tokenization_vits.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization class for VITS.""" - import json import os import re diff --git a/src/transformers/models/vivit/configuration_vivit.py b/src/transformers/models/vivit/configuration_vivit.py index 4cbebc7692..63895e4fb7 100644 --- a/src/transformers/models/vivit/configuration_vivit.py +++ b/src/transformers/models/vivit/configuration_vivit.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" ViViT model configuration""" +"""ViViT model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py index 571dfe896e..c3075d6034 100644 --- a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py +++ b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py @@ -15,6 +15,7 @@ """Convert Flax ViViT checkpoints from the original repository to PyTorch. URL: https://github.com/google-research/scenic/tree/main/scenic/projects/vivit """ + import argparse import json import os.path diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 9b62aedc23..35babdfd2c 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Image processor class for Vivit.""" + from typing import Dict, List, Optional, Union import numpy as np diff --git a/src/transformers/models/vivit/modeling_vivit.py b/src/transformers/models/vivit/modeling_vivit.py index c8bac4f02e..97891bee1a 100755 --- a/src/transformers/models/vivit/modeling_vivit.py +++ b/src/transformers/models/vivit/modeling_vivit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch ViViT model.""" - +"""PyTorch ViViT model.""" import math from typing import Optional, Set, Tuple, Union diff --git a/src/transformers/models/wav2vec2/configuration_wav2vec2.py b/src/transformers/models/wav2vec2/configuration_wav2vec2.py index 1d6777efcb..b4a676ddba 100644 --- a/src/transformers/models/wav2vec2/configuration_wav2vec2.py +++ b/src/transformers/models/wav2vec2/configuration_wav2vec2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Wav2Vec2 model configuration""" +"""Wav2Vec2 model configuration""" import functools import operator diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py index 28554691c6..0a6aff8c81 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Wav2Vec2 checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py index bcc9fd95a4..1702bc5a47 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Hubert checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 86cfb5e089..9a24b9d39f 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax Wav2Vec2 model.""" +"""Flax Wav2Vec2 model.""" from functools import partial from typing import Optional, Tuple, Union diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index efbae8f232..a56a2cb55e 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow Wav2Vec2 model.""" - +"""TensorFlow Wav2Vec2 model.""" from __future__ import annotations diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 5fb64c1f2c..bdf2d4f50a 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Wav2Vec2 model.""" +"""PyTorch Wav2Vec2 model.""" import math import warnings diff --git a/src/transformers/models/wav2vec2/processing_wav2vec2.py b/src/transformers/models/wav2vec2/processing_wav2vec2.py index dc6e9d14ee..1b47c0a988 100644 --- a/src/transformers/models/wav2vec2/processing_wav2vec2.py +++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py @@ -15,6 +15,7 @@ """ Speech processor class for Wav2Vec2 """ + import warnings from contextlib import contextmanager diff --git a/src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py index f6c364884b..20b4e4fa13 100644 --- a/src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Wav2Vec2Bert model configuration""" - +"""Wav2Vec2Bert model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py b/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py index 8b77cd71f7..6405f45470 100644 --- a/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py +++ b/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Wav2Vec2Bert BERT checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py index 077546a65e..db4e5985e6 100644 --- a/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Wav2Vec2-BERT model.""" +"""PyTorch Wav2Vec2-BERT model.""" import math import warnings diff --git a/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py index ec792ce75a..d24c672007 100644 --- a/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py @@ -15,6 +15,7 @@ """ Speech processor class for Wav2Vec2-BERT """ + import warnings from ...processing_utils import ProcessorMixin diff --git a/src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py index 5c931342c9..05cdceeb9c 100644 --- a/src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Wav2Vec2Conformer model configuration""" +"""Wav2Vec2Conformer model configuration""" import functools import operator diff --git a/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py index 1a882e95ab..8c435c6cd9 100644 --- a/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Wav2Vec2Conformer checkpoint.""" - import argparse import json import os diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index da8ed52faf..14e80b516c 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Wav2Vec2-Conformer model.""" +"""PyTorch Wav2Vec2-Conformer model.""" import math import warnings diff --git a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py index 8809e2c2e8..ff4704c778 100644 --- a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +++ b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py @@ -78,7 +78,6 @@ class Wav2Vec2PhonemeCTCTokenizerOutput(ModelOutput): class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): - """ Constructs a Wav2Vec2PhonemeCTC tokenizer. diff --git a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py index 9fceb1e61a..30652d5f3a 100644 --- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py @@ -15,6 +15,7 @@ """ Speech processor class for Wav2Vec2 """ + import os import warnings from contextlib import contextmanager, nullcontext diff --git a/src/transformers/models/wavlm/configuration_wavlm.py b/src/transformers/models/wavlm/configuration_wavlm.py index a860475336..3faeb7ab53 100644 --- a/src/transformers/models/wavlm/configuration_wavlm.py +++ b/src/transformers/models/wavlm/configuration_wavlm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" WavLM model configuration""" +"""WavLM model configuration""" import functools import operator diff --git a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py index 84e3d231ea..e41ae0881d 100644 --- a/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert WavLM checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py index e41aa0099a..447d4db67f 100644 --- a/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py +++ b/src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert Hubert checkpoint.""" - import argparse import torch diff --git a/src/transformers/models/wavlm/modeling_wavlm.py b/src/transformers/models/wavlm/modeling_wavlm.py index dd52277b9a..320959b439 100755 --- a/src/transformers/models/wavlm/modeling_wavlm.py +++ b/src/transformers/models/wavlm/modeling_wavlm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch WavLM model.""" +"""PyTorch WavLM model.""" import math import warnings diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index c924a21c2a..e7c0f47b58 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Whisper model configuration""" +"""Whisper model configuration""" from collections import OrderedDict from typing import TYPE_CHECKING, Any, Mapping, Optional, Union diff --git a/src/transformers/models/whisper/feature_extraction_whisper.py b/src/transformers/models/whisper/feature_extraction_whisper.py index f2d6da5660..22f31c4b2e 100644 --- a/src/transformers/models/whisper/feature_extraction_whisper.py +++ b/src/transformers/models/whisper/feature_extraction_whisper.py @@ -15,6 +15,7 @@ """ Feature extractor class for Whisper """ + from typing import List, Optional, Union import numpy as np diff --git a/src/transformers/models/whisper/modeling_flax_whisper.py b/src/transformers/models/whisper/modeling_flax_whisper.py index 14ce899ab1..8ad4ed9de2 100644 --- a/src/transformers/models/whisper/modeling_flax_whisper.py +++ b/src/transformers/models/whisper/modeling_flax_whisper.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax whisper model.""" +"""Flax whisper model.""" import math import random diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 8033bb584f..18f55dce8a 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TensorFlow Whisper model.""" - +"""TensorFlow Whisper model.""" from __future__ import annotations diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index e4fda437bf..f24da860a1 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch Whisper model.""" +"""PyTorch Whisper model.""" + import math from typing import Optional, Tuple, Union diff --git a/src/transformers/models/whisper/processing_whisper.py b/src/transformers/models/whisper/processing_whisper.py index cae72aa0e1..f22aae143e 100644 --- a/src/transformers/models/whisper/processing_whisper.py +++ b/src/transformers/models/whisper/processing_whisper.py @@ -16,7 +16,6 @@ Speech processor class for Whisper """ - from ...processing_utils import ProcessorMixin diff --git a/src/transformers/models/whisper/tokenization_whisper.py b/src/transformers/models/whisper/tokenization_whisper.py index 0a6ad5be6e..5f7f0ae53d 100644 --- a/src/transformers/models/whisper/tokenization_whisper.py +++ b/src/transformers/models/whisper/tokenization_whisper.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for Whisper.""" + import json import os import warnings diff --git a/src/transformers/models/whisper/tokenization_whisper_fast.py b/src/transformers/models/whisper/tokenization_whisper_fast.py index 54aa60839c..a9e57cca72 100644 --- a/src/transformers/models/whisper/tokenization_whisper_fast.py +++ b/src/transformers/models/whisper/tokenization_whisper_fast.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for Whisper.""" + import json import os import re diff --git a/src/transformers/models/x_clip/configuration_x_clip.py b/src/transformers/models/x_clip/configuration_x_clip.py index 757429d6df..6314616eef 100644 --- a/src/transformers/models/x_clip/configuration_x_clip.py +++ b/src/transformers/models/x_clip/configuration_x_clip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" X-CLIP model configuration""" +"""X-CLIP model configuration""" import os from typing import Union diff --git a/src/transformers/models/x_clip/modeling_x_clip.py b/src/transformers/models/x_clip/modeling_x_clip.py index 092ea94761..21b1c88aa0 100644 --- a/src/transformers/models/x_clip/modeling_x_clip.py +++ b/src/transformers/models/x_clip/modeling_x_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch X-CLIP model.""" - +"""PyTorch X-CLIP model.""" from copy import copy from dataclasses import dataclass diff --git a/src/transformers/models/xglm/configuration_xglm.py b/src/transformers/models/xglm/configuration_xglm.py index 8eebcfaee6..c5a275405d 100644 --- a/src/transformers/models/xglm/configuration_xglm.py +++ b/src/transformers/models/xglm/configuration_xglm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XGLM model configuration""" +"""XGLM model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/xglm/modeling_flax_xglm.py b/src/transformers/models/xglm/modeling_flax_xglm.py index d6b90a7f00..473448c66c 100644 --- a/src/transformers/models/xglm/modeling_flax_xglm.py +++ b/src/transformers/models/xglm/modeling_flax_xglm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Flax XGLM model.""" - +"""Flax XGLM model.""" import math import random diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 6b563b6657..a62396b79c 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 XGLM model.""" - +"""TF 2.0 XGLM model.""" from __future__ import annotations diff --git a/src/transformers/models/xglm/modeling_xglm.py b/src/transformers/models/xglm/modeling_xglm.py index 27d5fa36d7..4f16935834 100755 --- a/src/transformers/models/xglm/modeling_xglm.py +++ b/src/transformers/models/xglm/modeling_xglm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch XGLM model.""" - +"""PyTorch XGLM model.""" import math from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/xglm/tokenization_xglm.py b/src/transformers/models/xglm/tokenization_xglm.py index 818ca163da..8713d5f129 100644 --- a/src/transformers/models/xglm/tokenization_xglm.py +++ b/src/transformers/models/xglm/tokenization_xglm.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for .""" + import os from shutil import copyfile from typing import Any, Dict, List, Optional, Tuple diff --git a/src/transformers/models/xlm/configuration_xlm.py b/src/transformers/models/xlm/configuration_xlm.py index 2f8b5d6ef2..39db12c51b 100644 --- a/src/transformers/models/xlm/configuration_xlm.py +++ b/src/transformers/models/xlm/configuration_xlm.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XLM configuration""" +"""XLM configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py index 6f3cdf920a..71c3a1f989 100755 --- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert OpenAI GPT checkpoint.""" - import argparse import json diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index ff5211a27c..f03f416a08 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - TF 2.0 XLM model. +TF 2.0 XLM model. """ - from __future__ import annotations import itertools diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index 23f93bebb7..2803836309 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - PyTorch XLM model. +PyTorch XLM model. """ import itertools diff --git a/src/transformers/models/xlm/tokenization_xlm.py b/src/transformers/models/xlm/tokenization_xlm.py index b39e4c2708..b20823e017 100644 --- a/src/transformers/models/xlm/tokenization_xlm.py +++ b/src/transformers/models/xlm/tokenization_xlm.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for XLM.""" - import json import os import re @@ -129,8 +128,8 @@ def romanian_preprocessing(text): text = text.replace("\u0218", "S").replace("\u0219", "s") # s-comma text = text.replace("\u021a", "T").replace("\u021b", "t") # t-comma text = text.replace("\u0102", "A").replace("\u0103", "a") - text = text.replace("\u00C2", "A").replace("\u00E2", "a") - text = text.replace("\u00CE", "I").replace("\u00EE", "i") + text = text.replace("\u00c2", "A").replace("\u00e2", "a") + text = text.replace("\u00ce", "I").replace("\u00ee", "i") return text diff --git a/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py index bfb2a898f0..94d38242b6 100644 --- a/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py +++ b/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XLM-ProphetNet model configuration""" - +"""XLM-ProphetNet model configuration""" from typing import Callable, Optional, Union diff --git a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py index ccba8fde5d..669c21026e 100644 --- a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py +++ b/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch XLM-ProphetNet model.""" - +"""PyTorch XLM-ProphetNet model.""" import copy import math diff --git a/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py b/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py index b9e348e498..100321db48 100644 --- a/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/configuration_xlm_roberta.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XLM-RoBERTa configuration""" +"""XLM-RoBERTa configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py index c0ad2c7c7d..efc5b69667 100644 --- a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" TF 2.0 XLM-RoBERTa model.""" - +"""TF 2.0 XLM-RoBERTa model.""" from __future__ import annotations diff --git a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py index 20300a19ce..35a06aeb91 100644 --- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for XLM-RoBERTa model.""" - +"""Tokenization classes for XLM-RoBERTa model.""" import os from shutil import copyfile diff --git a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py index f32e715154..4ad2596a6f 100644 --- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py +++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -""" Tokenization classes for XLM-RoBERTa model.""" - +"""Tokenization classes for XLM-RoBERTa model.""" import os from shutil import copyfile diff --git a/src/transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py index ac7b13755c..6ee323ae76 100644 --- a/src/transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XLM_ROBERTa_XL configuration""" +"""XLM_ROBERTa_XL configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/xlnet/configuration_xlnet.py b/src/transformers/models/xlnet/configuration_xlnet.py index 957a09dbf6..0a35b204f4 100644 --- a/src/transformers/models/xlnet/configuration_xlnet.py +++ b/src/transformers/models/xlnet/configuration_xlnet.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" XLNet configuration""" +"""XLNet configuration""" import warnings diff --git a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py index 804b52b0dc..a15c5f22ad 100755 --- a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert BERT checkpoint.""" - import argparse import os diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index f998b19bb6..eeacebae04 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - TF 2.0 XLNet model. +TF 2.0 XLNet model. """ - from __future__ import annotations import warnings diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index 7d04fe8cfa..5d424ebe12 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -14,8 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - PyTorch XLNet model. +PyTorch XLNet model. """ + import warnings from dataclasses import dataclass from typing import List, Optional, Tuple, Union diff --git a/src/transformers/models/xlnet/tokenization_xlnet.py b/src/transformers/models/xlnet/tokenization_xlnet.py index 8d87f34ba2..9d4b35775e 100644 --- a/src/transformers/models/xlnet/tokenization_xlnet.py +++ b/src/transformers/models/xlnet/tokenization_xlnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for XLNet model.""" - +"""Tokenization classes for XLNet model.""" import os import unicodedata diff --git a/src/transformers/models/xlnet/tokenization_xlnet_fast.py b/src/transformers/models/xlnet/tokenization_xlnet_fast.py index d77307e7a3..a506e8c45a 100644 --- a/src/transformers/models/xlnet/tokenization_xlnet_fast.py +++ b/src/transformers/models/xlnet/tokenization_xlnet_fast.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Tokenization classes for XLNet model.""" - +"""Tokenization classes for XLNet model.""" import os from shutil import copyfile diff --git a/src/transformers/models/xmod/configuration_xmod.py b/src/transformers/models/xmod/configuration_xmod.py index 4ca5265251..34261a0d7c 100644 --- a/src/transformers/models/xmod/configuration_xmod.py +++ b/src/transformers/models/xmod/configuration_xmod.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" X-MOD configuration""" +"""X-MOD configuration""" + from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/yolos/configuration_yolos.py b/src/transformers/models/yolos/configuration_yolos.py index 2493403b4f..d6fe04f426 100644 --- a/src/transformers/models/yolos/configuration_yolos.py +++ b/src/transformers/models/yolos/configuration_yolos.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" YOLOS model configuration""" +"""YOLOS model configuration""" from collections import OrderedDict from typing import Mapping diff --git a/src/transformers/models/yolos/convert_yolos_to_pytorch.py b/src/transformers/models/yolos/convert_yolos_to_pytorch.py index 35238151ab..6cddc60661 100644 --- a/src/transformers/models/yolos/convert_yolos_to_pytorch.py +++ b/src/transformers/models/yolos/convert_yolos_to_pytorch.py @@ -14,7 +14,6 @@ # limitations under the License. """Convert YOLOS checkpoints from the original repository. URL: https://github.com/hustvl/YOLOS""" - import argparse import json from pathlib import Path diff --git a/src/transformers/models/yolos/modeling_yolos.py b/src/transformers/models/yolos/modeling_yolos.py index faae349e6e..2acf48849a 100755 --- a/src/transformers/models/yolos/modeling_yolos.py +++ b/src/transformers/models/yolos/modeling_yolos.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch YOLOS model.""" - +"""PyTorch YOLOS model.""" import collections.abc import math diff --git a/src/transformers/models/yoso/configuration_yoso.py b/src/transformers/models/yoso/configuration_yoso.py index 906856fa5d..e353744cc7 100644 --- a/src/transformers/models/yoso/configuration_yoso.py +++ b/src/transformers/models/yoso/configuration_yoso.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" YOSO model configuration""" +"""YOSO model configuration""" from ...configuration_utils import PretrainedConfig from ...utils import logging diff --git a/src/transformers/models/yoso/modeling_yoso.py b/src/transformers/models/yoso/modeling_yoso.py index c2d327bf84..a94c6de542 100644 --- a/src/transformers/models/yoso/modeling_yoso.py +++ b/src/transformers/models/yoso/modeling_yoso.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch YOSO model.""" - +"""PyTorch YOSO model.""" import math from pathlib import Path diff --git a/src/transformers/optimization_tf.py b/src/transformers/optimization_tf.py index 25023430ed..f27913156c 100644 --- a/src/transformers/optimization_tf.py +++ b/src/transformers/optimization_tf.py @@ -14,7 +14,6 @@ # ============================================================================== """Functions and classes related to optimization (weight updates).""" - import re from typing import Callable, List, Optional, Union diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index a8e47fb683..d76fa4dccc 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Processing saving/loading class for common processors. +Processing saving/loading class for common processors. """ import copy diff --git a/src/transformers/quantizers/base.py b/src/transformers/quantizers/base.py index 1cfb0d5856..3ee28ada1b 100644 --- a/src/transformers/quantizers/base.py +++ b/src/transformers/quantizers/base.py @@ -212,19 +212,15 @@ class HfQuantizer(ABC): ) @abstractmethod - def _process_model_before_weight_loading(self, model, **kwargs): - ... + def _process_model_before_weight_loading(self, model, **kwargs): ... @abstractmethod - def _process_model_after_weight_loading(self, model, **kwargs): - ... + def _process_model_after_weight_loading(self, model, **kwargs): ... @property @abstractmethod - def is_serializable(self): - ... + def is_serializable(self): ... @property @abstractmethod - def is_trainable(self): - ... + def is_trainable(self): ... diff --git a/src/transformers/time_series_utils.py b/src/transformers/time_series_utils.py index 02eddd72ce..7d9716e481 100644 --- a/src/transformers/time_series_utils.py +++ b/src/transformers/time_series_utils.py @@ -16,6 +16,7 @@ """ Time series distributional output classes and utilities. """ + from typing import Callable, Dict, Optional, Tuple import torch diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 8f1b15c1c1..f936bc25ad 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Tokenization classes for python tokenizers. For fast tokenizers (provided by HuggingFace's tokenizers library) see - tokenization_utils_fast.py +Tokenization classes for python tokenizers. For fast tokenizers (provided by HuggingFace's tokenizers library) see +tokenization_utils_fast.py """ + import bisect import itertools import re @@ -943,12 +944,10 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): return [0] * ((len(token_ids_1) if token_ids_1 else 0) + len(token_ids_0)) @overload - def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str: - ... + def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str: ... @overload - def convert_ids_to_tokens(self, ids: List[int], skip_special_tokens: bool = False) -> List[str]: - ... + def convert_ids_to_tokens(self, ids: List[int], skip_special_tokens: bool = False) -> List[str]: ... def convert_ids_to_tokens( self, ids: Union[int, List[int]], skip_special_tokens: bool = False diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 494c049a15..07e1ef3651 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - Tokenization classes for fast tokenizers (provided by HuggingFace's tokenizers library). For slow (python) tokenizers - see tokenization_utils.py +Tokenization classes for fast tokenizers (provided by HuggingFace's tokenizers library). For slow (python) tokenizers +see tokenization_utils.py """ + import copy import json import os diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index d2570ed8ba..45ecf7c80c 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -15,6 +15,7 @@ """ Callbacks to use with the Trainer class and customize the training loop. """ + import copy import dataclasses import json diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 14fcfe4a50..6f72494ad8 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Collection of utils to be used by backbones and their components.""" +"""Collection of utils to be used by backbones and their components.""" import enum import inspect diff --git a/src/transformers/utils/fx.py b/src/transformers/utils/fx.py index b19efac130..c3687c035c 100755 --- a/src/transformers/utils/fx.py +++ b/src/transformers/utils/fx.py @@ -634,9 +634,9 @@ _MANUAL_META_OVERRIDES: Dict[Callable, Callable] = { } if is_torch_greater_or_equal_than_2_0: - _MANUAL_META_OVERRIDES[ - torch.nn.functional.scaled_dot_product_attention - ] = torch_nn_functional_scaled_dot_product_attention + _MANUAL_META_OVERRIDES[torch.nn.functional.scaled_dot_product_attention] = ( + torch_nn_functional_scaled_dot_product_attention + ) class HFProxy(Proxy): diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index ef32ecde72..173fcb352d 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -14,6 +14,7 @@ """ Hub utilities: utilities related to download and cache models """ + import json import os import re diff --git a/src/transformers/utils/logging.py b/src/transformers/utils/logging.py index 3471e5ab66..f2fbe393f7 100644 --- a/src/transformers/utils/logging.py +++ b/src/transformers/utils/logging.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Logging utilities.""" - +"""Logging utilities.""" import functools import logging diff --git a/src/transformers/utils/sentencepiece_model_pb2_new.py b/src/transformers/utils/sentencepiece_model_pb2_new.py index 4a2e29b1bd..65d03e5a28 100644 --- a/src/transformers/utils/sentencepiece_model_pb2_new.py +++ b/src/transformers/utils/sentencepiece_model_pb2_new.py @@ -2,6 +2,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: sentencepiece_model.proto """Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database diff --git a/tests/models/align/test_modeling_align.py b/tests/models/align/test_modeling_align.py index cf6962a3c6..f2d1655f25 100644 --- a/tests/models/align/test_modeling_align.py +++ b/tests/models/align/test_modeling_align.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ALIGN model. """ - +"""Testing suite for the PyTorch ALIGN model.""" import inspect import os diff --git a/tests/models/altclip/test_modeling_altclip.py b/tests/models/altclip/test_modeling_altclip.py index 8c5be789c0..c7c735b19f 100755 --- a/tests/models/altclip/test_modeling_altclip.py +++ b/tests/models/altclip/test_modeling_altclip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch AltCLIP model. """ - +"""Testing suite for the PyTorch AltCLIP model.""" import inspect import os diff --git a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py index ad10b99547..9ab07b5508 100644 --- a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py +++ b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Audio Spectrogram Transformer (AST) model. """ +"""Testing suite for the PyTorch Audio Spectrogram Transformer (AST) model.""" import inspect import unittest diff --git a/tests/models/autoformer/test_modeling_autoformer.py b/tests/models/autoformer/test_modeling_autoformer.py index 265f5dd7b7..fb00c403f6 100644 --- a/tests/models/autoformer/test_modeling_autoformer.py +++ b/tests/models/autoformer/test_modeling_autoformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Autoformer model. """ +"""Testing suite for the PyTorch Autoformer model.""" import inspect import tempfile diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py index bfc6a4dadf..67c18288a8 100644 --- a/tests/models/bark/test_modeling_bark.py +++ b/tests/models/bark/test_modeling_bark.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Bark model. """ - +"""Testing suite for the PyTorch Bark model.""" import copy import inspect diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index 3804933735..9ca4db900d 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BART model. """ - +"""Testing suite for the PyTorch BART model.""" import copy import tempfile diff --git a/tests/models/beit/test_modeling_beit.py b/tests/models/beit/test_modeling_beit.py index 50287cb7bc..1010c6007d 100644 --- a/tests/models/beit/test_modeling_beit.py +++ b/tests/models/beit/test_modeling_beit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BEiT model. """ - +"""Testing suite for the PyTorch BEiT model.""" import unittest diff --git a/tests/models/bert/test_tokenization_bert.py b/tests/models/bert/test_tokenization_bert.py index cf3cc1dce1..5cebf58029 100644 --- a/tests/models/bert/test_tokenization_bert.py +++ b/tests/models/bert/test_tokenization_bert.py @@ -66,14 +66,14 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) @@ -84,7 +84,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer() - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -103,7 +103,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer(do_lower_case=True) rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True) - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -121,7 +121,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) def test_basic_tokenizer_lower(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -129,7 +129,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_false(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) @@ -137,7 +137,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) def test_basic_tokenizer_lower_strip_accents_true(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) @@ -145,7 +145,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_default(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -153,7 +153,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_no_lower(self): tokenizer = BasicTokenizer(do_lower_case=False) @@ -208,7 +208,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/bert/test_tokenization_bert_tf.py b/tests/models/bert/test_tokenization_bert_tf.py index f950e7439c..0539613a10 100644 --- a/tests/models/bert/test_tokenization_bert_tf.py +++ b/tests/models/bert/test_tokenization_bert_tf.py @@ -50,7 +50,7 @@ class BertTokenizationTest(unittest.TestCase): self.test_sentences = [ "This is a straightforward English test sentence.", - "This one has some weird characters\rto\nsee\r\nif those\u00E9break things.", + "This one has some weird characters\rto\nsee\r\nif those\u00e9break things.", "Now we're going to add some Chinese: 一 二 三 一二三", "And some much more rare Chinese: 齉 堃 齉堃", "Je vais aussi écrire en français pour tester les accents", diff --git a/tests/models/big_bird/test_modeling_big_bird.py b/tests/models/big_bird/test_modeling_big_bird.py index 02af95879a..dd22eb4a6d 100644 --- a/tests/models/big_bird/test_modeling_big_bird.py +++ b/tests/models/big_bird/test_modeling_big_bird.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BigBird model. """ - +"""Testing suite for the PyTorch BigBird model.""" import unittest diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py index 82b7cb574d..8a009aef5e 100644 --- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py +++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BigBirdPegasus model. """ - +"""Testing suite for the PyTorch BigBirdPegasus model.""" import copy import tempfile diff --git a/tests/models/biogpt/test_modeling_biogpt.py b/tests/models/biogpt/test_modeling_biogpt.py index 58dd39e86a..51e836a1f9 100644 --- a/tests/models/biogpt/test_modeling_biogpt.py +++ b/tests/models/biogpt/test_modeling_biogpt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BioGPT model. """ +"""Testing suite for the PyTorch BioGPT model.""" import math import unittest diff --git a/tests/models/bit/test_modeling_bit.py b/tests/models/bit/test_modeling_bit.py index dbc4cacdeb..ec666ec973 100644 --- a/tests/models/bit/test_modeling_bit.py +++ b/tests/models/bit/test_modeling_bit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Bit model. """ - +"""Testing suite for the PyTorch Bit model.""" import unittest diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index 64ae71b24b..eaab22b7d5 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Blenderbot model. """ +"""Testing suite for the PyTorch Blenderbot model.""" import tempfile import unittest diff --git a/tests/models/blenderbot/test_tokenization_blenderbot.py b/tests/models/blenderbot/test_tokenization_blenderbot.py index 7fbf2b7603..70f55f9448 100644 --- a/tests/models/blenderbot/test_tokenization_blenderbot.py +++ b/tests/models/blenderbot/test_tokenization_blenderbot.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tests for Blenderbot Tokenizers, including common tests for BlenderbotSmallTokenizer.""" + import unittest from transformers import BlenderbotTokenizer, BlenderbotTokenizerFast diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py index 39e953490f..8f847eb52d 100644 --- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BlenderbotSmall model. """ +"""Testing suite for the PyTorch BlenderbotSmall model.""" import tempfile import unittest diff --git a/tests/models/blenderbot_small/test_tokenization_blenderbot_small.py b/tests/models/blenderbot_small/test_tokenization_blenderbot_small.py index 369dde6739..7ee3e989fb 100644 --- a/tests/models/blenderbot_small/test_tokenization_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_tokenization_blenderbot_small.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tests for the Blenderbot small tokenizer.""" + import json import os import unittest diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py index 89404342f0..2ea4f09214 100644 --- a/tests/models/blip/test_modeling_blip.py +++ b/tests/models/blip/test_modeling_blip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Blip model. """ - +"""Testing suite for the PyTorch Blip model.""" import inspect import os diff --git a/tests/models/blip/test_modeling_blip_text.py b/tests/models/blip/test_modeling_blip_text.py index 3c12a7e9ea..ea6e138b7e 100644 --- a/tests/models/blip/test_modeling_blip_text.py +++ b/tests/models/blip/test_modeling_blip_text.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Blip model. """ +"""Testing suite for the PyTorch Blip model.""" + import unittest import numpy as np diff --git a/tests/models/blip/test_modeling_tf_blip.py b/tests/models/blip/test_modeling_tf_blip.py index a35eb7a1bd..06e7d7e224 100644 --- a/tests/models/blip/test_modeling_tf_blip.py +++ b/tests/models/blip/test_modeling_tf_blip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow Blip model. """ - +"""Testing suite for the TensorFlow Blip model.""" from __future__ import annotations diff --git a/tests/models/blip/test_modeling_tf_blip_text.py b/tests/models/blip/test_modeling_tf_blip_text.py index 7583b61b58..dff9bef82e 100644 --- a/tests/models/blip/test_modeling_tf_blip_text.py +++ b/tests/models/blip/test_modeling_tf_blip_text.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow Blip model. """ +"""Testing suite for the TensorFlow Blip model.""" + from __future__ import annotations import unittest diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py index d2f3b2b719..f32ffea850 100644 --- a/tests/models/blip_2/test_modeling_blip_2.py +++ b/tests/models/blip_2/test_modeling_blip_2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BLIP-2 model. """ - +"""Testing suite for the PyTorch BLIP-2 model.""" import inspect import tempfile diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index c9ce8f076f..b8de60ac5c 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch BridgeTower model. """ +"""Testing suite for the PyTorch BridgeTower model.""" import tempfile import unittest diff --git a/tests/models/bros/test_modeling_bros.py b/tests/models/bros/test_modeling_bros.py index 4b1290ed49..14f904c3b7 100644 --- a/tests/models/bros/test_modeling_bros.py +++ b/tests/models/bros/test_modeling_bros.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Bros model. """ +"""Testing suite for the PyTorch Bros model.""" import copy import unittest diff --git a/tests/models/byt5/test_tokenization_byt5.py b/tests/models/byt5/test_tokenization_byt5.py index 3793241d7e..0033021ea7 100644 --- a/tests/models/byt5/test_tokenization_byt5.py +++ b/tests/models/byt5/test_tokenization_byt5.py @@ -181,7 +181,7 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" before_tokens = tokenizer.encode(sample_text, add_special_tokens=False) tokenizer.save_pretrained(tmpdirname) @@ -197,7 +197,7 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" tokenizer.add_tokens(["bim", "bambam"]) additional_special_tokens = tokenizer.additional_special_tokens additional_special_tokens.append("new_additional_special_token") diff --git a/tests/models/canine/test_modeling_canine.py b/tests/models/canine/test_modeling_canine.py index 5c342ee975..2d2dfef40e 100644 --- a/tests/models/canine/test_modeling_canine.py +++ b/tests/models/canine/test_modeling_canine.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch CANINE model. """ - +"""Testing suite for the PyTorch CANINE model.""" import unittest from typing import List, Tuple diff --git a/tests/models/canine/test_tokenization_canine.py b/tests/models/canine/test_tokenization_canine.py index ec987f6dd6..d34ac324ea 100644 --- a/tests/models/canine/test_tokenization_canine.py +++ b/tests/models/canine/test_tokenization_canine.py @@ -98,7 +98,7 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" before_tokens = tokenizer.encode(sample_text, add_special_tokens=False) tokenizer.save_pretrained(tmpdirname) @@ -114,7 +114,7 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" additional_special_tokens = tokenizer.additional_special_tokens diff --git a/tests/models/chinese_clip/test_modeling_chinese_clip.py b/tests/models/chinese_clip/test_modeling_chinese_clip.py index 8ee9028eca..0b8220c8ed 100644 --- a/tests/models/chinese_clip/test_modeling_chinese_clip.py +++ b/tests/models/chinese_clip/test_modeling_chinese_clip.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Chinese-CLIP model. """ +"""Testing suite for the PyTorch Chinese-CLIP model.""" import inspect import os diff --git a/tests/models/clap/test_modeling_clap.py b/tests/models/clap/test_modeling_clap.py index f06fabf0a2..835a06b0ce 100644 --- a/tests/models/clap/test_modeling_clap.py +++ b/tests/models/clap/test_modeling_clap.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch CLAP model. """ - +"""Testing suite for the PyTorch CLAP model.""" import inspect import os diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py index 16c8f47b78..fa33176363 100644 --- a/tests/models/clip/test_modeling_clip.py +++ b/tests/models/clip/test_modeling_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch CLIP model. """ - +"""Testing suite for the PyTorch CLIP model.""" import inspect import os diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py index 4e1ec7f88e..83080db94b 100644 --- a/tests/models/clip/test_modeling_tf_clip.py +++ b/tests/models/clip/test_modeling_tf_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow CLIP model. """ - +"""Testing suite for the TensorFlow CLIP model.""" from __future__ import annotations diff --git a/tests/models/clip/test_tokenization_clip.py b/tests/models/clip/test_tokenization_clip.py index ec1cbd08ac..5885f8933c 100644 --- a/tests/models/clip/test_tokenization_clip.py +++ b/tests/models/clip/test_tokenization_clip.py @@ -97,11 +97,11 @@ class CLIPTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Test that the tokenization is identical on unicode of space type spaces_unicodes = [ "\u0009", # (horizontal tab, '\t') - "\u000B", # (vertical tab) - "\u000C", # (form feed) + "\u000b", # (vertical tab) + "\u000c", # (form feed) "\u0020", # (space, ' ') - "\u200E", # (left-to-right mark):w - "\u200F", # (right-to-left mark) + "\u200e", # (left-to-right mark):w + "\u200f", # (right-to-left mark) ] for unicode_seq in spaces_unicodes: text_tokenized_s = tokenizer_s.tokenize(unicode_seq) @@ -111,11 +111,11 @@ class CLIPTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Test that the tokenization is identical on unicode of line break type line_break_unicodes = [ - "\u000A", # (line feed, '\n') + "\u000a", # (line feed, '\n') "\r\n", # (carriage return and line feed, '\r\n') - "\u000D", # (carriage return, '\r') + "\u000d", # (carriage return, '\r') "\r", # (carriage return, '\r') - "\u000D", # (carriage return, '\r') + "\u000d", # (carriage return, '\r') "\u2028", # (line separator) "\u2029", # (paragraph separator) # "\u0085", # (next line) diff --git a/tests/models/clipseg/test_modeling_clipseg.py b/tests/models/clipseg/test_modeling_clipseg.py index 6b82a9af41..9e819d3b26 100644 --- a/tests/models/clipseg/test_modeling_clipseg.py +++ b/tests/models/clipseg/test_modeling_clipseg.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch CLIPSeg model. """ - +"""Testing suite for the PyTorch CLIPSeg model.""" import inspect import os diff --git a/tests/models/clvp/test_modeling_clvp.py b/tests/models/clvp/test_modeling_clvp.py index 7d5064377f..1370c9acc8 100644 --- a/tests/models/clvp/test_modeling_clvp.py +++ b/tests/models/clvp/test_modeling_clvp.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Clvp model. """ - +"""Testing suite for the PyTorch Clvp model.""" import gc import tempfile diff --git a/tests/models/cohere/test_modeling_cohere.py b/tests/models/cohere/test_modeling_cohere.py index 07fd363724..a6fb6b39ff 100644 --- a/tests/models/cohere/test_modeling_cohere.py +++ b/tests/models/cohere/test_modeling_cohere.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Cohere model. """ +"""Testing suite for the PyTorch Cohere model.""" import unittest diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index a01acffafb..6fea53fc66 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Conditional DETR model. """ - +"""Testing suite for the PyTorch Conditional DETR model.""" import inspect import math diff --git a/tests/models/convbert/test_modeling_convbert.py b/tests/models/convbert/test_modeling_convbert.py index 80a31c9bc2..0866f76798 100644 --- a/tests/models/convbert/test_modeling_convbert.py +++ b/tests/models/convbert/test_modeling_convbert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ConvBERT model. """ +"""Testing suite for the PyTorch ConvBERT model.""" + import os import tempfile import unittest diff --git a/tests/models/convnext/test_modeling_convnext.py b/tests/models/convnext/test_modeling_convnext.py index 9f0789dffc..b618608f0e 100644 --- a/tests/models/convnext/test_modeling_convnext.py +++ b/tests/models/convnext/test_modeling_convnext.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ConvNext model. """ - +"""Testing suite for the PyTorch ConvNext model.""" import unittest diff --git a/tests/models/convnext/test_modeling_tf_convnext.py b/tests/models/convnext/test_modeling_tf_convnext.py index 4a06632513..ec8ea8e13d 100644 --- a/tests/models/convnext/test_modeling_tf_convnext.py +++ b/tests/models/convnext/test_modeling_tf_convnext.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow ConvNext model. """ +"""Testing suite for the TensorFlow ConvNext model.""" from __future__ import annotations diff --git a/tests/models/convnextv2/test_modeling_convnextv2.py b/tests/models/convnextv2/test_modeling_convnextv2.py index 5d78d31c3e..3201de6f93 100644 --- a/tests/models/convnextv2/test_modeling_convnextv2.py +++ b/tests/models/convnextv2/test_modeling_convnextv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ConvNextV2 model. """ - +"""Testing suite for the PyTorch ConvNextV2 model.""" import unittest diff --git a/tests/models/convnextv2/test_modeling_tf_convnextv2.py b/tests/models/convnextv2/test_modeling_tf_convnextv2.py index 5f0bd2f29d..58b4795afd 100644 --- a/tests/models/convnextv2/test_modeling_tf_convnextv2.py +++ b/tests/models/convnextv2/test_modeling_tf_convnextv2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow ConvNext model. """ +"""Testing suite for the TensorFlow ConvNext model.""" from __future__ import annotations diff --git a/tests/models/cpmant/test_modeling_cpmant.py b/tests/models/cpmant/test_modeling_cpmant.py index 7a037becbf..5473791b85 100644 --- a/tests/models/cpmant/test_modeling_cpmant.py +++ b/tests/models/cpmant/test_modeling_cpmant.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch CPMAnt model. """ +"""Testing suite for the PyTorch CPMAnt model.""" import unittest diff --git a/tests/models/cvt/test_modeling_cvt.py b/tests/models/cvt/test_modeling_cvt.py index 8e9376de27..9facafcd77 100644 --- a/tests/models/cvt/test_modeling_cvt.py +++ b/tests/models/cvt/test_modeling_cvt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch CvT model. """ - +"""Testing suite for the PyTorch CvT model.""" import unittest from math import floor diff --git a/tests/models/cvt/test_modeling_tf_cvt.py b/tests/models/cvt/test_modeling_tf_cvt.py index 0cae0bbcf2..211529719a 100644 --- a/tests/models/cvt/test_modeling_tf_cvt.py +++ b/tests/models/cvt/test_modeling_tf_cvt.py @@ -1,5 +1,4 @@ -""" Testing suite for the Tensorflow CvT model. """ - +"""Testing suite for the Tensorflow CvT model.""" from __future__ import annotations diff --git a/tests/models/data2vec/test_modeling_data2vec_audio.py b/tests/models/data2vec/test_modeling_data2vec_audio.py index b9e3bff346..0376edad2d 100644 --- a/tests/models/data2vec/test_modeling_data2vec_audio.py +++ b/tests/models/data2vec/test_modeling_data2vec_audio.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Data2VecAudio model. """ +"""Testing suite for the PyTorch Data2VecAudio model.""" import math import unittest diff --git a/tests/models/data2vec/test_modeling_data2vec_text.py b/tests/models/data2vec/test_modeling_data2vec_text.py index 5a3edaa7ad..c5e07f8e2e 100644 --- a/tests/models/data2vec/test_modeling_data2vec_text.py +++ b/tests/models/data2vec/test_modeling_data2vec_text.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Data2VecAudio model. """ +"""Testing suite for the PyTorch Data2VecAudio model.""" import unittest diff --git a/tests/models/data2vec/test_modeling_data2vec_vision.py b/tests/models/data2vec/test_modeling_data2vec_vision.py index c426a6ca7e..99cbd66fbb 100644 --- a/tests/models/data2vec/test_modeling_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_data2vec_vision.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Data2VecVision model. """ - +"""Testing suite for the PyTorch Data2VecVision model.""" import unittest diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py index bb6e0d5476..5ccbca8f0d 100644 --- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow Data2VecVision model. """ +"""Testing suite for the TensorFlow Data2VecVision model.""" from __future__ import annotations diff --git a/tests/models/dbrx/test_modeling_dbrx.py b/tests/models/dbrx/test_modeling_dbrx.py index 4c6b74a4d7..66c9600084 100644 --- a/tests/models/dbrx/test_modeling_dbrx.py +++ b/tests/models/dbrx/test_modeling_dbrx.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DBRX model. """ - +"""Testing suite for the PyTorch DBRX model.""" import unittest diff --git a/tests/models/decision_transformer/test_modeling_decision_transformer.py b/tests/models/decision_transformer/test_modeling_decision_transformer.py index f7f362dce8..5d69bbd0b2 100644 --- a/tests/models/decision_transformer/test_modeling_decision_transformer.py +++ b/tests/models/decision_transformer/test_modeling_decision_transformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DecisionTransformer model. """ - +"""Testing suite for the PyTorch DecisionTransformer model.""" import inspect import unittest diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 5831d338dc..2ae3e3f088 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Deformable DETR model. """ - +"""Testing suite for the PyTorch Deformable DETR model.""" import inspect import math diff --git a/tests/models/deit/test_modeling_deit.py b/tests/models/deit/test_modeling_deit.py index adc7154a50..f558ee09a5 100644 --- a/tests/models/deit/test_modeling_deit.py +++ b/tests/models/deit/test_modeling_deit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DeiT model. """ - +"""Testing suite for the PyTorch DeiT model.""" import unittest import warnings diff --git a/tests/models/deit/test_modeling_tf_deit.py b/tests/models/deit/test_modeling_tf_deit.py index c26635cef6..fefd506800 100644 --- a/tests/models/deit/test_modeling_tf_deit.py +++ b/tests/models/deit/test_modeling_tf_deit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow DeiT model. """ - +"""Testing suite for the TensorFlow DeiT model.""" from __future__ import annotations diff --git a/tests/models/depth_anything/test_modeling_depth_anything.py b/tests/models/depth_anything/test_modeling_depth_anything.py index 3b807abf71..ef1326520a 100644 --- a/tests/models/depth_anything/test_modeling_depth_anything.py +++ b/tests/models/depth_anything/test_modeling_depth_anything.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Depth Anything model. """ - +"""Testing suite for the PyTorch Depth Anything model.""" import unittest diff --git a/tests/models/deta/test_modeling_deta.py b/tests/models/deta/test_modeling_deta.py index faab773efd..fa840212a5 100644 --- a/tests/models/deta/test_modeling_deta.py +++ b/tests/models/deta/test_modeling_deta.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DETA model. """ - +"""Testing suite for the PyTorch DETA model.""" import collections import inspect diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index ee1af9ed9d..f6277cced3 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DETR model. """ - +"""Testing suite for the PyTorch DETR model.""" import inspect import math diff --git a/tests/models/dinat/test_modeling_dinat.py b/tests/models/dinat/test_modeling_dinat.py index 158ce77395..2fe9de14b9 100644 --- a/tests/models/dinat/test_modeling_dinat.py +++ b/tests/models/dinat/test_modeling_dinat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Dinat model. """ +"""Testing suite for the PyTorch Dinat model.""" import collections import unittest diff --git a/tests/models/dinov2/test_modeling_dinov2.py b/tests/models/dinov2/test_modeling_dinov2.py index 9896f2c2bb..5bf2d51b2d 100644 --- a/tests/models/dinov2/test_modeling_dinov2.py +++ b/tests/models/dinov2/test_modeling_dinov2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Dinov2 model. """ - +"""Testing suite for the PyTorch Dinov2 model.""" import unittest diff --git a/tests/models/donut/test_modeling_donut_swin.py b/tests/models/donut/test_modeling_donut_swin.py index 4d9be165bb..8b86dffab1 100644 --- a/tests/models/donut/test_modeling_donut_swin.py +++ b/tests/models/donut/test_modeling_donut_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Donut Swin model. """ +"""Testing suite for the PyTorch Donut Swin model.""" import collections import unittest diff --git a/tests/models/dpt/test_modeling_dpt.py b/tests/models/dpt/test_modeling_dpt.py index a49f8d5d94..8c6231bc1c 100644 --- a/tests/models/dpt/test_modeling_dpt.py +++ b/tests/models/dpt/test_modeling_dpt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DPT model. """ - +"""Testing suite for the PyTorch DPT model.""" import unittest diff --git a/tests/models/dpt/test_modeling_dpt_auto_backbone.py b/tests/models/dpt/test_modeling_dpt_auto_backbone.py index 01d5398edd..2b37b816f4 100644 --- a/tests/models/dpt/test_modeling_dpt_auto_backbone.py +++ b/tests/models/dpt/test_modeling_dpt_auto_backbone.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DPT model. """ - +"""Testing suite for the PyTorch DPT model.""" import unittest diff --git a/tests/models/dpt/test_modeling_dpt_hybrid.py b/tests/models/dpt/test_modeling_dpt_hybrid.py index a63e736e41..2caa8d71ae 100644 --- a/tests/models/dpt/test_modeling_dpt_hybrid.py +++ b/tests/models/dpt/test_modeling_dpt_hybrid.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch DPT model. """ - +"""Testing suite for the PyTorch DPT model.""" import unittest diff --git a/tests/models/efficientformer/test_modeling_efficientformer.py b/tests/models/efficientformer/test_modeling_efficientformer.py index 15a4cb0be3..6b7ce810ce 100644 --- a/tests/models/efficientformer/test_modeling_efficientformer.py +++ b/tests/models/efficientformer/test_modeling_efficientformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch EfficientFormer model. """ - +"""Testing suite for the PyTorch EfficientFormer model.""" import unittest import warnings diff --git a/tests/models/efficientformer/test_modeling_tf_efficientformer.py b/tests/models/efficientformer/test_modeling_tf_efficientformer.py index fcd6958ed3..abb0878740 100644 --- a/tests/models/efficientformer/test_modeling_tf_efficientformer.py +++ b/tests/models/efficientformer/test_modeling_tf_efficientformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow EfficientFormer model. """ +"""Testing suite for the TensorFlow EfficientFormer model.""" import inspect import unittest diff --git a/tests/models/efficientnet/test_modeling_efficientnet.py b/tests/models/efficientnet/test_modeling_efficientnet.py index dbca9b31a2..eaf0f4ec87 100644 --- a/tests/models/efficientnet/test_modeling_efficientnet.py +++ b/tests/models/efficientnet/test_modeling_efficientnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch EfficientNet model. """ - +"""Testing suite for the PyTorch EfficientNet model.""" import unittest diff --git a/tests/models/electra/test_tokenization_electra.py b/tests/models/electra/test_tokenization_electra.py index 64611cb09c..f3648e9863 100644 --- a/tests/models/electra/test_tokenization_electra.py +++ b/tests/models/electra/test_tokenization_electra.py @@ -65,14 +65,14 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) @@ -83,7 +83,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer() - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -102,7 +102,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer(do_lower_case=True) rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True) - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -120,7 +120,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) def test_basic_tokenizer_lower(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -128,7 +128,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_false(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) @@ -136,7 +136,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) def test_basic_tokenizer_lower_strip_accents_true(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) @@ -144,7 +144,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_default(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -152,7 +152,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_no_lower(self): tokenizer = BasicTokenizer(do_lower_case=False) @@ -201,7 +201,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index dd07f5a6e2..c2974943e0 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Encodec model. """ +"""Testing suite for the PyTorch Encodec model.""" import copy import inspect diff --git a/tests/models/ernie_m/test_modeling_ernie_m.py b/tests/models/ernie_m/test_modeling_ernie_m.py index e429a12e6e..17c9aa89f3 100644 --- a/tests/models/ernie_m/test_modeling_ernie_m.py +++ b/tests/models/ernie_m/test_modeling_ernie_m.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ErnieM model. """ - +"""Testing suite for the PyTorch ErnieM model.""" import unittest diff --git a/tests/models/ernie_m/test_tokenization_ernie_m.py b/tests/models/ernie_m/test_tokenization_ernie_m.py index 5cc5ec6991..01de7d3731 100644 --- a/tests/models/ernie_m/test_tokenization_ernie_m.py +++ b/tests/models/ernie_m/test_tokenization_ernie_m.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ErnieM model. """ +"""Testing suite for the PyTorch ErnieM model.""" import unittest diff --git a/tests/models/esm/test_modeling_esm.py b/tests/models/esm/test_modeling_esm.py index db3ccd6fd2..e8e72184b3 100644 --- a/tests/models/esm/test_modeling_esm.py +++ b/tests/models/esm/test_modeling_esm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ESM model. """ - +"""Testing suite for the PyTorch ESM model.""" import unittest diff --git a/tests/models/esm/test_modeling_esmfold.py b/tests/models/esm/test_modeling_esmfold.py index 1ec5ab8acb..11306e7368 100644 --- a/tests/models/esm/test_modeling_esmfold.py +++ b/tests/models/esm/test_modeling_esmfold.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ESM model. """ - +"""Testing suite for the PyTorch ESM model.""" import unittest diff --git a/tests/models/falcon/test_modeling_falcon.py b/tests/models/falcon/test_modeling_falcon.py index 59ab316140..50e8fcdbb4 100644 --- a/tests/models/falcon/test_modeling_falcon.py +++ b/tests/models/falcon/test_modeling_falcon.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Falcon model. """ - +"""Testing suite for the PyTorch Falcon model.""" import tempfile import unittest diff --git a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py index 4cf104e693..cf57ee079a 100644 --- a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py +++ b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch FastSpeech2Conformer model.""" +"""Testing suite for the PyTorch FastSpeech2Conformer model.""" import inspect import tempfile diff --git a/tests/models/flava/test_modeling_flava.py b/tests/models/flava/test_modeling_flava.py index 2b628e1413..1287a297a4 100644 --- a/tests/models/flava/test_modeling_flava.py +++ b/tests/models/flava/test_modeling_flava.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch FLAVA model. """ - +"""Testing suite for the PyTorch FLAVA model.""" import inspect import os diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 5941e2006c..8686c60ab6 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch FNet model. """ - +"""Testing suite for the PyTorch FNet model.""" import unittest from typing import Dict, List, Tuple diff --git a/tests/models/focalnet/test_modeling_focalnet.py b/tests/models/focalnet/test_modeling_focalnet.py index fb2bb1c2c1..2cd29fc2c3 100644 --- a/tests/models/focalnet/test_modeling_focalnet.py +++ b/tests/models/focalnet/test_modeling_focalnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch FocalNet model. """ +"""Testing suite for the PyTorch FocalNet model.""" import collections import unittest diff --git a/tests/models/funnel/test_tokenization_funnel.py b/tests/models/funnel/test_tokenization_funnel.py index 7628582e9f..9ddb3b325d 100644 --- a/tests/models/funnel/test_tokenization_funnel.py +++ b/tests/models/funnel/test_tokenization_funnel.py @@ -61,23 +61,23 @@ class FunnelTokenizationTest(TokenizerTesterMixin, unittest.TestCase): return FunnelTokenizerFast.from_pretrained(self.tmpdirname, **kwargs) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) def test_token_type_ids(self): tokenizers = self.get_tokenizers(do_lower_case=False) for tokenizer in tokenizers: - inputs = tokenizer("UNwant\u00E9d,running") + inputs = tokenizer("UNwant\u00e9d,running") sentence_len = len(inputs["input_ids"]) - 1 self.assertListEqual(inputs["token_type_ids"], [2] + [0] * sentence_len) - inputs = tokenizer("UNwant\u00E9d,running", "UNwant\u00E9d,running") + inputs = tokenizer("UNwant\u00e9d,running", "UNwant\u00e9d,running") self.assertListEqual(inputs["token_type_ids"], [2] + [0] * sentence_len + [1] * sentence_len) diff --git a/tests/models/fuyu/test_modeling_fuyu.py b/tests/models/fuyu/test_modeling_fuyu.py index 84c9128892..f65498af33 100644 --- a/tests/models/fuyu/test_modeling_fuyu.py +++ b/tests/models/fuyu/test_modeling_fuyu.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Fuyu model. """ +"""Testing suite for the PyTorch Fuyu model.""" import io import unittest diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index fa0db9ee78..6aeb5f23c3 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Gemma model. """ +"""Testing suite for the PyTorch Gemma model.""" + import tempfile import unittest diff --git a/tests/models/glpn/test_modeling_glpn.py b/tests/models/glpn/test_modeling_glpn.py index b733164ec1..2e1486039e 100644 --- a/tests/models/glpn/test_modeling_glpn.py +++ b/tests/models/glpn/test_modeling_glpn.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch GLPN model. """ - +"""Testing suite for the PyTorch GLPN model.""" import unittest diff --git a/tests/models/gpt2/test_tokenization_gpt2_tf.py b/tests/models/gpt2/test_tokenization_gpt2_tf.py index 0cea50db31..06f16c36e3 100644 --- a/tests/models/gpt2/test_tokenization_gpt2_tf.py +++ b/tests/models/gpt2/test_tokenization_gpt2_tf.py @@ -55,7 +55,7 @@ class GPTTokenizationTest(unittest.TestCase): self.test_sentences = [ "This is a straightforward English test sentence.", - "This one has some weird characters\rto\nsee\r\nif those\u00E9break things.", + "This one has some weird characters\rto\nsee\r\nif those\u00e9break things.", "Now we're going to add some Chinese: 一 二 三 一二三", "And some much more rare Chinese: 齉 堃 齉堃", "Je vais aussi écrire en français pour tester les accents", diff --git a/tests/models/gpt_neo/test_modeling_gpt_neo.py b/tests/models/gpt_neo/test_modeling_gpt_neo.py index ce0aeadf16..5f4fd4d119 100644 --- a/tests/models/gpt_neo/test_modeling_gpt_neo.py +++ b/tests/models/gpt_neo/test_modeling_gpt_neo.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch GPT Neo model. """ - +"""Testing suite for the PyTorch GPT Neo model.""" import unittest diff --git a/tests/models/gpt_neox/test_modeling_gpt_neox.py b/tests/models/gpt_neox/test_modeling_gpt_neox.py index 92d130b351..ed5bcac55e 100644 --- a/tests/models/gpt_neox/test_modeling_gpt_neox.py +++ b/tests/models/gpt_neox/test_modeling_gpt_neox.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch GPTNeoX model. """ - +"""Testing suite for the PyTorch GPTNeoX model.""" import unittest diff --git a/tests/models/gpt_neox_japanese/test_modeling_gpt_neox_japanese.py b/tests/models/gpt_neox_japanese/test_modeling_gpt_neox_japanese.py index 752b7a0b8d..52e9d5d5b1 100644 --- a/tests/models/gpt_neox_japanese/test_modeling_gpt_neox_japanese.py +++ b/tests/models/gpt_neox_japanese/test_modeling_gpt_neox_japanese.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch GPTNeoXJapanese model. """ - +"""Testing suite for the PyTorch GPTNeoXJapanese model.""" import unittest diff --git a/tests/models/graphormer/test_modeling_graphormer.py b/tests/models/graphormer/test_modeling_graphormer.py index ddb72543f5..55b1ccc34a 100644 --- a/tests/models/graphormer/test_modeling_graphormer.py +++ b/tests/models/graphormer/test_modeling_graphormer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Graphormer model. """ - +"""Testing suite for the PyTorch Graphormer model.""" import copy import inspect diff --git a/tests/models/grounding_dino/test_modeling_grounding_dino.py b/tests/models/grounding_dino/test_modeling_grounding_dino.py index 1231baff7c..12f80260cb 100644 --- a/tests/models/grounding_dino/test_modeling_grounding_dino.py +++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Grounding DINO model. """ +"""Testing suite for the PyTorch Grounding DINO model.""" import collections import inspect diff --git a/tests/models/groupvit/test_modeling_groupvit.py b/tests/models/groupvit/test_modeling_groupvit.py index 5ec9bbbf1a..15e7ad29fc 100644 --- a/tests/models/groupvit/test_modeling_groupvit.py +++ b/tests/models/groupvit/test_modeling_groupvit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch GroupViT model. """ - +"""Testing suite for the PyTorch GroupViT model.""" import inspect import os diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py index be5ff803d9..3389735734 100644 --- a/tests/models/groupvit/test_modeling_tf_groupvit.py +++ b/tests/models/groupvit/test_modeling_tf_groupvit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow GroupViT model. """ - +"""Testing suite for the TensorFlow GroupViT model.""" from __future__ import annotations diff --git a/tests/models/hubert/test_modeling_hubert.py b/tests/models/hubert/test_modeling_hubert.py index d1a0558b4e..3e54e7d2db 100644 --- a/tests/models/hubert/test_modeling_hubert.py +++ b/tests/models/hubert/test_modeling_hubert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Hubert model. """ - +"""Testing suite for the PyTorch Hubert model.""" import math import os diff --git a/tests/models/idefics/test_modeling_idefics.py b/tests/models/idefics/test_modeling_idefics.py index 5c3d45d2e8..91a9f66166 100644 --- a/tests/models/idefics/test_modeling_idefics.py +++ b/tests/models/idefics/test_modeling_idefics.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Idefics model. """ +"""Testing suite for the PyTorch Idefics model.""" import unittest diff --git a/tests/models/idefics/test_modeling_tf_idefics.py b/tests/models/idefics/test_modeling_tf_idefics.py index eeb3faafa2..ffc4e586ba 100644 --- a/tests/models/idefics/test_modeling_tf_idefics.py +++ b/tests/models/idefics/test_modeling_tf_idefics.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TF Idefics model. """ +"""Testing suite for the TF Idefics model.""" import os import tempfile diff --git a/tests/models/informer/test_modeling_informer.py b/tests/models/informer/test_modeling_informer.py index d932e68b3c..d81711a8ec 100644 --- a/tests/models/informer/test_modeling_informer.py +++ b/tests/models/informer/test_modeling_informer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Informer model. """ +"""Testing suite for the PyTorch Informer model.""" import inspect import tempfile diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py index 86aea876fa..e037b5bbe3 100644 --- a/tests/models/instructblip/test_modeling_instructblip.py +++ b/tests/models/instructblip/test_modeling_instructblip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch InstructBLIP model. """ - +"""Testing suite for the PyTorch InstructBLIP model.""" import inspect import tempfile diff --git a/tests/models/jamba/test_modeling_jamba.py b/tests/models/jamba/test_modeling_jamba.py index 0fe515e516..13208d54f1 100644 --- a/tests/models/jamba/test_modeling_jamba.py +++ b/tests/models/jamba/test_modeling_jamba.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Jamba model. """ +"""Testing suite for the PyTorch Jamba model.""" + import math import tempfile import unittest diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py index ca944d0df0..66f070ed46 100644 --- a/tests/models/kosmos2/test_modeling_kosmos2.py +++ b/tests/models/kosmos2/test_modeling_kosmos2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch KOSMOS-2 model. """ - +"""Testing suite for the PyTorch KOSMOS-2 model.""" import copy import inspect diff --git a/tests/models/layoutlm/test_tokenization_layoutlm.py b/tests/models/layoutlm/test_tokenization_layoutlm.py index a34811a90b..3ddd6e7660 100644 --- a/tests/models/layoutlm/test_tokenization_layoutlm.py +++ b/tests/models/layoutlm/test_tokenization_layoutlm.py @@ -58,14 +58,14 @@ class LayoutLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): return LayoutLMTokenizer.from_pretrained(self.tmpdirname, **kwargs) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index d70721e04c..284ba82e3f 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch LayoutLMv2 model. """ - +"""Testing suite for the PyTorch LayoutLMv2 model.""" import unittest diff --git a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py index ce6bbd0f01..9f9a86a999 100644 --- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py @@ -126,14 +126,14 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) def test_basic_tokenizer_lower(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -141,7 +141,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_false(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) @@ -149,7 +149,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) def test_basic_tokenizer_lower_strip_accents_true(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) @@ -157,7 +157,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_default(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -165,7 +165,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_no_lower(self): tokenizer = BasicTokenizer(do_lower_case=False) @@ -218,7 +218,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py index f280633c6a..6cb93d8b42 100644 --- a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch LayoutLMv3 model. """ +"""Testing suite for the PyTorch LayoutLMv3 model.""" import copy import unittest diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py index 6ae2e5090e..f473eb7580 100644 --- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow LayoutLMv3 model. """ +"""Testing suite for the TensorFlow LayoutLMv3 model.""" from __future__ import annotations diff --git a/tests/models/layoutxlm/test_tokenization_layoutxlm.py b/tests/models/layoutxlm/test_tokenization_layoutxlm.py index 8f1d353efd..bf8e6be498 100644 --- a/tests/models/layoutxlm/test_tokenization_layoutxlm.py +++ b/tests/models/layoutxlm/test_tokenization_layoutxlm.py @@ -103,7 +103,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer.save_pretrained(self.tmpdirname) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py index 10d944c496..5b459bfd11 100644 --- a/tests/models/led/test_modeling_led.py +++ b/tests/models/led/test_modeling_led.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch LED model. """ - +"""Testing suite for the PyTorch LED model.""" import copy import tempfile diff --git a/tests/models/levit/test_modeling_levit.py b/tests/models/levit/test_modeling_levit.py index 38c9c88594..6303e0365f 100644 --- a/tests/models/levit/test_modeling_levit.py +++ b/tests/models/levit/test_modeling_levit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch LeViT model. """ - +"""Testing suite for the PyTorch LeViT model.""" import unittest import warnings diff --git a/tests/models/longformer/test_tokenization_longformer.py b/tests/models/longformer/test_tokenization_longformer.py index 1d1eda3380..7ed35f8361 100644 --- a/tests/models/longformer/test_tokenization_longformer.py +++ b/tests/models/longformer/test_tokenization_longformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the Longformer tokenizer. """ - +"""Testing suite for the Longformer tokenizer.""" import itertools import json diff --git a/tests/models/luke/test_modeling_luke.py b/tests/models/luke/test_modeling_luke.py index a35d5ec3dc..a9a5d4f938 100644 --- a/tests/models/luke/test_modeling_luke.py +++ b/tests/models/luke/test_modeling_luke.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch LUKE model. """ +"""Testing suite for the PyTorch LUKE model.""" + import unittest from transformers import LukeConfig, is_torch_available diff --git a/tests/models/lxmert/test_tokenization_lxmert.py b/tests/models/lxmert/test_tokenization_lxmert.py index 716e3b971f..9500416c27 100644 --- a/tests/models/lxmert/test_tokenization_lxmert.py +++ b/tests/models/lxmert/test_tokenization_lxmert.py @@ -55,14 +55,14 @@ class LxmertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) diff --git a/tests/models/m2m_100/test_modeling_m2m_100.py b/tests/models/m2m_100/test_modeling_m2m_100.py index c280f698ba..412ece863f 100644 --- a/tests/models/m2m_100/test_modeling_m2m_100.py +++ b/tests/models/m2m_100/test_modeling_m2m_100.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch M2M100 model. """ - +"""Testing suite for the PyTorch M2M100 model.""" import copy import tempfile diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py index 3144dd48da..89a7358cca 100644 --- a/tests/models/marian/test_modeling_marian.py +++ b/tests/models/marian/test_modeling_marian.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Marian model. """ +"""Testing suite for the PyTorch Marian model.""" import tempfile import unittest diff --git a/tests/models/markuplm/test_tokenization_markuplm.py b/tests/models/markuplm/test_tokenization_markuplm.py index 370b1c5692..5d74f88fe5 100644 --- a/tests/models/markuplm/test_tokenization_markuplm.py +++ b/tests/models/markuplm/test_tokenization_markuplm.py @@ -106,7 +106,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): pass def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text diff --git a/tests/models/mask2former/test_modeling_mask2former.py b/tests/models/mask2former/test_modeling_mask2former.py index d4167cfffe..100cbafa05 100644 --- a/tests/models/mask2former/test_modeling_mask2former.py +++ b/tests/models/mask2former/test_modeling_mask2former.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Mask2Former model. """ +"""Testing suite for the PyTorch Mask2Former model.""" import unittest diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py index 6ba48517c3..7b2bec17f4 100644 --- a/tests/models/maskformer/test_modeling_maskformer.py +++ b/tests/models/maskformer/test_modeling_maskformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MaskFormer model. """ +"""Testing suite for the PyTorch MaskFormer model.""" import copy import unittest diff --git a/tests/models/maskformer/test_modeling_maskformer_swin.py b/tests/models/maskformer/test_modeling_maskformer_swin.py index 8d29e8ebee..01e9ed0e67 100644 --- a/tests/models/maskformer/test_modeling_maskformer_swin.py +++ b/tests/models/maskformer/test_modeling_maskformer_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MaskFormer Swin model. """ +"""Testing suite for the PyTorch MaskFormer Swin model.""" import collections import unittest diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py index 93294d6568..1eaa4c4cdb 100644 --- a/tests/models/mbart/test_modeling_mbart.py +++ b/tests/models/mbart/test_modeling_mbart.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MBART model. """ - +"""Testing suite for the PyTorch MBART model.""" import copy import tempfile diff --git a/tests/models/megatron_bert/test_modeling_megatron_bert.py b/tests/models/megatron_bert/test_modeling_megatron_bert.py index 818f65d80c..2ba1b92c21 100644 --- a/tests/models/megatron_bert/test_modeling_megatron_bert.py +++ b/tests/models/megatron_bert/test_modeling_megatron_bert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MegatronBERT model. """ - +"""Testing suite for the PyTorch MegatronBERT model.""" import math import os diff --git a/tests/models/mgp_str/test_modeling_mgp_str.py b/tests/models/mgp_str/test_modeling_mgp_str.py index b2c3cb1400..ec1ac5a24a 100644 --- a/tests/models/mgp_str/test_modeling_mgp_str.py +++ b/tests/models/mgp_str/test_modeling_mgp_str.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MGP-STR model. """ +"""Testing suite for the PyTorch MGP-STR model.""" import unittest diff --git a/tests/models/mgp_str/test_processor_mgp_str.py b/tests/models/mgp_str/test_processor_mgp_str.py index 3fd7d83534..6a028a2842 100644 --- a/tests/models/mgp_str/test_processor_mgp_str.py +++ b/tests/models/mgp_str/test_processor_mgp_str.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the MgpstrProcessor. """ +"""Testing suite for the MgpstrProcessor.""" import json import os diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index b60062004a..02b203c7ae 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Mixtral model. """ - +"""Testing suite for the PyTorch Mixtral model.""" import tempfile import unittest diff --git a/tests/models/mobilebert/test_tokenization_mobilebert.py b/tests/models/mobilebert/test_tokenization_mobilebert.py index 4d5e09e08d..0a01e2396a 100644 --- a/tests/models/mobilebert/test_tokenization_mobilebert.py +++ b/tests/models/mobilebert/test_tokenization_mobilebert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the MobileBERT tokenizer. """ - +"""Testing suite for the MobileBERT tokenizer.""" import os import unittest @@ -73,7 +72,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.get_input_output_texts def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text @@ -81,7 +80,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) @@ -93,7 +92,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer() - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -112,7 +111,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer(do_lower_case=True) rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True) - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -131,7 +130,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower def test_basic_tokenizer_lower(self): @@ -140,7 +139,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower_strip_accents_false def test_basic_tokenizer_lower_strip_accents_false(self): @@ -149,7 +148,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower_strip_accents_true def test_basic_tokenizer_lower_strip_accents_true(self): @@ -158,7 +157,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower_strip_accents_default def test_basic_tokenizer_lower_strip_accents_default(self): @@ -167,7 +166,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_no_lower def test_basic_tokenizer_no_lower(self): @@ -222,7 +221,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/mobilenet_v1/test_modeling_mobilenet_v1.py b/tests/models/mobilenet_v1/test_modeling_mobilenet_v1.py index 3be955a729..a1a8b8d9c1 100644 --- a/tests/models/mobilenet_v1/test_modeling_mobilenet_v1.py +++ b/tests/models/mobilenet_v1/test_modeling_mobilenet_v1.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MobileNetV1 model. """ - +"""Testing suite for the PyTorch MobileNetV1 model.""" import unittest diff --git a/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py b/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py index f8a1ce8d26..eb3105e427 100644 --- a/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py +++ b/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MobileNetV2 model. """ - +"""Testing suite for the PyTorch MobileNetV2 model.""" import unittest diff --git a/tests/models/mobilevit/test_modeling_mobilevit.py b/tests/models/mobilevit/test_modeling_mobilevit.py index 0fb94f38d6..381763d449 100644 --- a/tests/models/mobilevit/test_modeling_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_mobilevit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MobileViT model. """ - +"""Testing suite for the PyTorch MobileViT model.""" import unittest diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py index 3132b93649..fcad3be021 100644 --- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow MobileViT model. """ - +"""Testing suite for the TensorFlow MobileViT model.""" from __future__ import annotations diff --git a/tests/models/mobilevitv2/test_modeling_mobilevitv2.py b/tests/models/mobilevitv2/test_modeling_mobilevitv2.py index ff45a8c0b6..340bd22fbb 100644 --- a/tests/models/mobilevitv2/test_modeling_mobilevitv2.py +++ b/tests/models/mobilevitv2/test_modeling_mobilevitv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MobileViTV2 model. """ - +"""Testing suite for the PyTorch MobileViTV2 model.""" import unittest diff --git a/tests/models/mpnet/test_tokenization_mpnet.py b/tests/models/mpnet/test_tokenization_mpnet.py index b63dc7ab64..f1049f8ef5 100644 --- a/tests/models/mpnet/test_tokenization_mpnet.py +++ b/tests/models/mpnet/test_tokenization_mpnet.py @@ -57,14 +57,14 @@ class MPNetTokenizerTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) diff --git a/tests/models/mra/test_modeling_mra.py b/tests/models/mra/test_modeling_mra.py index a0bf0ec65e..1df002a124 100644 --- a/tests/models/mra/test_modeling_mra.py +++ b/tests/models/mra/test_modeling_mra.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MRA model. """ - +"""Testing suite for the PyTorch MRA model.""" import unittest diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index 8482072d73..e21ccb71f0 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Musicgen model. """ +"""Testing suite for the PyTorch Musicgen model.""" + import copy import inspect import math diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index b32b508258..7587b61abf 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Musicgen Melody model. """ +"""Testing suite for the PyTorch Musicgen Melody model.""" + import copy import inspect import math diff --git a/tests/models/mvp/test_modeling_mvp.py b/tests/models/mvp/test_modeling_mvp.py index 225ea4a786..5637d62fd0 100644 --- a/tests/models/mvp/test_modeling_mvp.py +++ b/tests/models/mvp/test_modeling_mvp.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch MVP model. """ - +"""Testing suite for the PyTorch MVP model.""" import copy import tempfile diff --git a/tests/models/nat/test_modeling_nat.py b/tests/models/nat/test_modeling_nat.py index 6a68311cc6..c04472620b 100644 --- a/tests/models/nat/test_modeling_nat.py +++ b/tests/models/nat/test_modeling_nat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Nat model. """ +"""Testing suite for the PyTorch Nat model.""" import collections import unittest diff --git a/tests/models/nllb_moe/test_modeling_nllb_moe.py b/tests/models/nllb_moe/test_modeling_nllb_moe.py index 6997b896a5..714a1f17cc 100644 --- a/tests/models/nllb_moe/test_modeling_nllb_moe.py +++ b/tests/models/nllb_moe/test_modeling_nllb_moe.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch NLLB-MoE model. """ - +"""Testing suite for the PyTorch NLLB-MoE model.""" import copy import tempfile diff --git a/tests/models/nystromformer/test_modeling_nystromformer.py b/tests/models/nystromformer/test_modeling_nystromformer.py index f5bcb0ba5f..3d812ebf04 100644 --- a/tests/models/nystromformer/test_modeling_nystromformer.py +++ b/tests/models/nystromformer/test_modeling_nystromformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Nystromformer model. """ - +"""Testing suite for the PyTorch Nystromformer model.""" import unittest diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py index 906bd73a70..ee87521c5b 100644 --- a/tests/models/olmo/test_modeling_olmo.py +++ b/tests/models/olmo/test_modeling_olmo.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch OLMo model. """ +"""Testing suite for the PyTorch OLMo model.""" import unittest diff --git a/tests/models/oneformer/test_modeling_oneformer.py b/tests/models/oneformer/test_modeling_oneformer.py index 538ab33cbf..9cdc475fae 100644 --- a/tests/models/oneformer/test_modeling_oneformer.py +++ b/tests/models/oneformer/test_modeling_oneformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch OneFormer model. """ +"""Testing suite for the PyTorch OneFormer model.""" import copy import inspect diff --git a/tests/models/opt/test_modeling_opt.py b/tests/models/opt/test_modeling_opt.py index 9845fcd469..e9843058f6 100644 --- a/tests/models/opt/test_modeling_opt.py +++ b/tests/models/opt/test_modeling_opt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch OPT model. """ - +"""Testing suite for the PyTorch OPT model.""" import copy import tempfile diff --git a/tests/models/owlv2/test_modeling_owlv2.py b/tests/models/owlv2/test_modeling_owlv2.py index d29f8c08c0..eb85a66a1b 100644 --- a/tests/models/owlv2/test_modeling_owlv2.py +++ b/tests/models/owlv2/test_modeling_owlv2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Owlv2 model. """ - +"""Testing suite for the PyTorch Owlv2 model.""" import inspect import os diff --git a/tests/models/owlvit/test_modeling_owlvit.py b/tests/models/owlvit/test_modeling_owlvit.py index 370de65447..64fe046fb9 100644 --- a/tests/models/owlvit/test_modeling_owlvit.py +++ b/tests/models/owlvit/test_modeling_owlvit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch OwlViT model. """ - +"""Testing suite for the PyTorch OwlViT model.""" import inspect import os diff --git a/tests/models/paligemma/test_modeling_paligemma.py b/tests/models/paligemma/test_modeling_paligemma.py index 0653e1057c..ed8d36debe 100644 --- a/tests/models/paligemma/test_modeling_paligemma.py +++ b/tests/models/paligemma/test_modeling_paligemma.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PaliGemma model. """ +"""Testing suite for the PyTorch PaliGemma model.""" import gc import unittest diff --git a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py index 3551263d2c..64a9c1dbc5 100644 --- a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py +++ b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PatchTSMixer model. """ +"""Testing suite for the PyTorch PatchTSMixer model.""" import inspect import itertools diff --git a/tests/models/patchtst/test_modeling_patchtst.py b/tests/models/patchtst/test_modeling_patchtst.py index 78d85590a7..381800e845 100644 --- a/tests/models/patchtst/test_modeling_patchtst.py +++ b/tests/models/patchtst/test_modeling_patchtst.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PatchTST model. """ +"""Testing suite for the PyTorch PatchTST model.""" import inspect import random diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py index dbc7c9de7b..85e37f2503 100644 --- a/tests/models/pegasus/test_modeling_pegasus.py +++ b/tests/models/pegasus/test_modeling_pegasus.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PEGASUS model. """ +"""Testing suite for the PyTorch PEGASUS model.""" import tempfile import unittest diff --git a/tests/models/pegasus_x/test_modeling_pegasus_x.py b/tests/models/pegasus_x/test_modeling_pegasus_x.py index 106a8b39e8..0fdcd0c075 100644 --- a/tests/models/pegasus_x/test_modeling_pegasus_x.py +++ b/tests/models/pegasus_x/test_modeling_pegasus_x.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PEGASUS-X model. """ - +"""Testing suite for the PyTorch PEGASUS-X model.""" import copy import math diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index 86bbc7b49d..530906388f 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Perceiver model. """ +"""Testing suite for the PyTorch Perceiver model.""" import copy import inspect diff --git a/tests/models/perceiver/test_tokenization_perceiver.py b/tests/models/perceiver/test_tokenization_perceiver.py index ff2b6e68dc..306f594d10 100644 --- a/tests/models/perceiver/test_tokenization_perceiver.py +++ b/tests/models/perceiver/test_tokenization_perceiver.py @@ -164,7 +164,7 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" before_tokens = tokenizer.encode(sample_text, add_special_tokens=False) tokenizer.save_pretrained(tmpdirname) @@ -180,7 +180,7 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" tokenizer.add_tokens(["bim", "bambam"]) additional_special_tokens = tokenizer.additional_special_tokens additional_special_tokens.append("new_additional_special_token") diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 46a650c55a..518cb7e037 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Persimmon model. """ - +"""Testing suite for the PyTorch Persimmon model.""" import gc import unittest diff --git a/tests/models/phi/test_modeling_phi.py b/tests/models/phi/test_modeling_phi.py index e3c145bfa2..f395b70c1e 100644 --- a/tests/models/phi/test_modeling_phi.py +++ b/tests/models/phi/test_modeling_phi.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Phi model. """ - +"""Testing suite for the PyTorch Phi model.""" import unittest diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py index cc0c00d4e1..ad9c4c46aa 100644 --- a/tests/models/phi3/test_modeling_phi3.py +++ b/tests/models/phi3/test_modeling_phi3.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Phi-3 model. """ - +"""Testing suite for the PyTorch Phi-3 model.""" import unittest diff --git a/tests/models/pix2struct/test_modeling_pix2struct.py b/tests/models/pix2struct/test_modeling_pix2struct.py index c96ea624b6..980fd078ea 100644 --- a/tests/models/pix2struct/test_modeling_pix2struct.py +++ b/tests/models/pix2struct/test_modeling_pix2struct.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Pix2Struct model. """ +"""Testing suite for the PyTorch Pix2Struct model.""" import copy import inspect diff --git a/tests/models/plbart/test_modeling_plbart.py b/tests/models/plbart/test_modeling_plbart.py index 998bd6c84f..cd21332ad4 100644 --- a/tests/models/plbart/test_modeling_plbart.py +++ b/tests/models/plbart/test_modeling_plbart.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PLBART model. """ - +"""Testing suite for the PyTorch PLBART model.""" import copy import tempfile diff --git a/tests/models/poolformer/test_modeling_poolformer.py b/tests/models/poolformer/test_modeling_poolformer.py index ca5c3015a7..66216c6a7a 100644 --- a/tests/models/poolformer/test_modeling_poolformer.py +++ b/tests/models/poolformer/test_modeling_poolformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch PoolFormer model. """ - +"""Testing suite for the PyTorch PoolFormer model.""" import unittest diff --git a/tests/models/pop2piano/test_modeling_pop2piano.py b/tests/models/pop2piano/test_modeling_pop2piano.py index 594f79b083..3a33b5a981 100644 --- a/tests/models/pop2piano/test_modeling_pop2piano.py +++ b/tests/models/pop2piano/test_modeling_pop2piano.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Pop2Piano model. """ +"""Testing suite for the PyTorch Pop2Piano model.""" import copy import tempfile diff --git a/tests/models/prophetnet/test_tokenization_prophetnet.py b/tests/models/prophetnet/test_tokenization_prophetnet.py index 09390db48b..5eede4d384 100644 --- a/tests/models/prophetnet/test_tokenization_prophetnet.py +++ b/tests/models/prophetnet/test_tokenization_prophetnet.py @@ -61,21 +61,21 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) def test_basic_tokenizer_lower(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -83,7 +83,7 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_false(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) @@ -91,7 +91,7 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) def test_basic_tokenizer_lower_strip_accents_true(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) @@ -99,7 +99,7 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_default(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -107,7 +107,7 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_no_lower(self): tokenizer = BasicTokenizer(do_lower_case=False) @@ -170,7 +170,7 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/pvt/test_modeling_pvt.py b/tests/models/pvt/test_modeling_pvt.py index d18a336a4a..feda0adeec 100644 --- a/tests/models/pvt/test_modeling_pvt.py +++ b/tests/models/pvt/test_modeling_pvt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Pvt model. """ - +"""Testing suite for the PyTorch Pvt model.""" import unittest diff --git a/tests/models/qdqbert/test_modeling_qdqbert.py b/tests/models/qdqbert/test_modeling_qdqbert.py index ed7f5a3239..96e63834ec 100644 --- a/tests/models/qdqbert/test_modeling_qdqbert.py +++ b/tests/models/qdqbert/test_modeling_qdqbert.py @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch QDQBERT model. """ - +"""Testing suite for the PyTorch QDQBERT model.""" import unittest diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index 54718c4303..f2e66953a7 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Qwen2 model. """ - +"""Testing suite for the PyTorch Qwen2 model.""" import gc import tempfile diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py index 48c6ccf78a..13af5b802b 100644 --- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py +++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Qwen2MoE model. """ - +"""Testing suite for the PyTorch Qwen2MoE model.""" import gc import tempfile diff --git a/tests/models/realm/test_modeling_realm.py b/tests/models/realm/test_modeling_realm.py index 4d6d9fd0ff..07a3b9d4b3 100644 --- a/tests/models/realm/test_modeling_realm.py +++ b/tests/models/realm/test_modeling_realm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch REALM model. """ +"""Testing suite for the PyTorch REALM model.""" import copy import unittest diff --git a/tests/models/realm/test_tokenization_realm.py b/tests/models/realm/test_tokenization_realm.py index f963fb347f..85c478837e 100644 --- a/tests/models/realm/test_tokenization_realm.py +++ b/tests/models/realm/test_tokenization_realm.py @@ -65,14 +65,14 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text def test_full_tokenizer(self): tokenizer = self.tokenizer_class(self.vocab_file) - tokens = tokenizer.tokenize("UNwant\u00E9d,running") + tokens = tokenizer.tokenize("UNwant\u00e9d,running") self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) @@ -83,7 +83,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer() - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -102,7 +102,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer(do_lower_case=True) rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True) - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -120,7 +120,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) def test_basic_tokenizer_lower(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -128,7 +128,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_false(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) @@ -136,7 +136,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) def test_basic_tokenizer_lower_strip_accents_true(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) @@ -144,7 +144,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_default(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -152,7 +152,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_no_lower(self): tokenizer = BasicTokenizer(do_lower_case=False) @@ -201,7 +201,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py index 599161e8d4..b0475b244f 100644 --- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py +++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch RecurrentGemma model. """ +"""Testing suite for the PyTorch RecurrentGemma model.""" + import unittest from transformers import AutoModelForCausalLM, AutoTokenizer, RecurrentGemmaConfig, is_torch_available, set_seed diff --git a/tests/models/regnet/test_modeling_regnet.py b/tests/models/regnet/test_modeling_regnet.py index 8840a141fa..78935c4e57 100644 --- a/tests/models/regnet/test_modeling_regnet.py +++ b/tests/models/regnet/test_modeling_regnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch RegNet model. """ - +"""Testing suite for the PyTorch RegNet model.""" import unittest diff --git a/tests/models/regnet/test_modeling_tf_regnet.py b/tests/models/regnet/test_modeling_tf_regnet.py index 70adc9c875..b2998ae070 100644 --- a/tests/models/regnet/test_modeling_tf_regnet.py +++ b/tests/models/regnet/test_modeling_tf_regnet.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow RegNet model. """ +"""Testing suite for the TensorFlow RegNet model.""" from __future__ import annotations diff --git a/tests/models/rembert/test_modeling_rembert.py b/tests/models/rembert/test_modeling_rembert.py index fe21ae2ecf..664888fcc0 100644 --- a/tests/models/rembert/test_modeling_rembert.py +++ b/tests/models/rembert/test_modeling_rembert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch RemBERT model. """ - +"""Testing suite for the PyTorch RemBERT model.""" import unittest diff --git a/tests/models/rembert/test_tokenization_rembert.py b/tests/models/rembert/test_tokenization_rembert.py index 096106a2fc..46794733f3 100644 --- a/tests/models/rembert/test_tokenization_rembert.py +++ b/tests/models/rembert/test_tokenization_rembert.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the RemBert tokenizer. """ - +"""Testing suite for the RemBert tokenizer.""" import tempfile import unittest diff --git a/tests/models/resnet/test_modeling_resnet.py b/tests/models/resnet/test_modeling_resnet.py index 0fd32bc72f..5595ebde69 100644 --- a/tests/models/resnet/test_modeling_resnet.py +++ b/tests/models/resnet/test_modeling_resnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ResNet model. """ - +"""Testing suite for the PyTorch ResNet model.""" import unittest diff --git a/tests/models/resnet/test_modeling_tf_resnet.py b/tests/models/resnet/test_modeling_tf_resnet.py index a8e2ce93ee..f5762c7225 100644 --- a/tests/models/resnet/test_modeling_tf_resnet.py +++ b/tests/models/resnet/test_modeling_tf_resnet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the Tensorflow ResNet model. """ - +"""Testing suite for the Tensorflow ResNet model.""" from __future__ import annotations diff --git a/tests/models/roc_bert/test_modeling_roc_bert.py b/tests/models/roc_bert/test_modeling_roc_bert.py index d52304ade9..55dc8453f4 100644 --- a/tests/models/roc_bert/test_modeling_roc_bert.py +++ b/tests/models/roc_bert/test_modeling_roc_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch RoCBert model. """ +"""Testing suite for the PyTorch RoCBert model.""" import unittest diff --git a/tests/models/roc_bert/test_tokenization_roc_bert.py b/tests/models/roc_bert/test_tokenization_roc_bert.py index 4fb2b172d6..fdd95a033a 100644 --- a/tests/models/roc_bert/test_tokenization_roc_bert.py +++ b/tests/models/roc_bert/test_tokenization_roc_bert.py @@ -73,7 +73,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_chinese(self): tokenizer = RoCBertBasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower with BasicTokenizer->RoCBertBasicTokenizer def test_basic_tokenizer_lower(self): @@ -82,7 +82,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower_strip_accents_false with BasicTokenizer->RoCBertBasicTokenizer def test_basic_tokenizer_lower_strip_accents_false(self): @@ -91,7 +91,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower_strip_accents_true with BasicTokenizer->RoCBertBasicTokenizer def test_basic_tokenizer_lower_strip_accents_true(self): @@ -100,7 +100,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_lower_strip_accents_default with BasicTokenizer->RoCBertBasicTokenizer def test_basic_tokenizer_lower_strip_accents_default(self): @@ -109,7 +109,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) # Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_basic_tokenizer_no_lower with BasicTokenizer->RoCBertBasicTokenizer def test_basic_tokenizer_no_lower(self): @@ -164,7 +164,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) diff --git a/tests/models/roformer/test_modeling_roformer.py b/tests/models/roformer/test_modeling_roformer.py index 64ce38c515..1c22243b37 100644 --- a/tests/models/roformer/test_modeling_roformer.py +++ b/tests/models/roformer/test_modeling_roformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch RoFormer model. """ - +"""Testing suite for the PyTorch RoFormer model.""" import unittest diff --git a/tests/models/sam/test_modeling_sam.py b/tests/models/sam/test_modeling_sam.py index ee9eeefd33..1590d4f806 100644 --- a/tests/models/sam/test_modeling_sam.py +++ b/tests/models/sam/test_modeling_sam.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SAM model. """ - +"""Testing suite for the PyTorch SAM model.""" import gc import unittest diff --git a/tests/models/sam/test_modeling_tf_sam.py b/tests/models/sam/test_modeling_tf_sam.py index d742a9b085..81fe6f1eaf 100644 --- a/tests/models/sam/test_modeling_tf_sam.py +++ b/tests/models/sam/test_modeling_tf_sam.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow SAM model. """ - +"""Testing suite for the TensorFlow SAM model.""" from __future__ import annotations diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index 925d734293..9445bf46fc 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SeamlessM4T model. """ - +"""Testing suite for the PyTorch SeamlessM4T model.""" import copy import tempfile diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py index b36e29d792..f450dca519 100644 --- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py +++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SeamlessM4Tv2 model. """ - +"""Testing suite for the PyTorch SeamlessM4Tv2 model.""" import copy import tempfile diff --git a/tests/models/segformer/test_modeling_segformer.py b/tests/models/segformer/test_modeling_segformer.py index fb38338513..f0f125933b 100644 --- a/tests/models/segformer/test_modeling_segformer.py +++ b/tests/models/segformer/test_modeling_segformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SegFormer model. """ - +"""Testing suite for the PyTorch SegFormer model.""" import unittest diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py index 16b5740a08..89a28925c2 100644 --- a/tests/models/segformer/test_modeling_tf_segformer.py +++ b/tests/models/segformer/test_modeling_tf_segformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow SegFormer model. """ +"""Testing suite for the TensorFlow SegFormer model.""" from __future__ import annotations diff --git a/tests/models/seggpt/test_modeling_seggpt.py b/tests/models/seggpt/test_modeling_seggpt.py index efa0231c1e..46c370db77 100644 --- a/tests/models/seggpt/test_modeling_seggpt.py +++ b/tests/models/seggpt/test_modeling_seggpt.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SegGpt model. """ - +"""Testing suite for the PyTorch SegGpt model.""" import inspect import math diff --git a/tests/models/sew/test_modeling_sew.py b/tests/models/sew/test_modeling_sew.py index 5342df9e08..2394ed015d 100644 --- a/tests/models/sew/test_modeling_sew.py +++ b/tests/models/sew/test_modeling_sew.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Hubert model. """ - +"""Testing suite for the PyTorch Hubert model.""" import math import unittest diff --git a/tests/models/sew_d/test_modeling_sew_d.py b/tests/models/sew_d/test_modeling_sew_d.py index 1980bd3ab1..a9af497226 100644 --- a/tests/models/sew_d/test_modeling_sew_d.py +++ b/tests/models/sew_d/test_modeling_sew_d.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Hubert model. """ - +"""Testing suite for the PyTorch Hubert model.""" import math import unittest diff --git a/tests/models/siglip/test_modeling_siglip.py b/tests/models/siglip/test_modeling_siglip.py index e0a4825d0e..d65416a3fa 100644 --- a/tests/models/siglip/test_modeling_siglip.py +++ b/tests/models/siglip/test_modeling_siglip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SigLIP model. """ - +"""Testing suite for the PyTorch SigLIP model.""" import inspect import os diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index 5d0e8f3a07..183120b820 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Speech2Text model. """ +"""Testing suite for the PyTorch Speech2Text model.""" import copy import inspect diff --git a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py index badde2485d..c2fd215f38 100644 --- a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow Speech2Text model. """ +"""Testing suite for the TensorFlow Speech2Text model.""" from __future__ import annotations diff --git a/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py b/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py index cbb449c6e7..fffa16aa30 100644 --- a/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py +++ b/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Speech2Text model. """ +"""Testing suite for the PyTorch Speech2Text model.""" import unittest diff --git a/tests/models/speecht5/test_modeling_speecht5.py b/tests/models/speecht5/test_modeling_speecht5.py index 622ae196bd..6badcfa060 100644 --- a/tests/models/speecht5/test_modeling_speecht5.py +++ b/tests/models/speecht5/test_modeling_speecht5.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SpeechT5 model. """ +"""Testing suite for the PyTorch SpeechT5 model.""" import copy import inspect diff --git a/tests/models/splinter/test_modeling_splinter.py b/tests/models/splinter/test_modeling_splinter.py index b6a2588c87..b62571f189 100644 --- a/tests/models/splinter/test_modeling_splinter.py +++ b/tests/models/splinter/test_modeling_splinter.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Splinter model. """ +"""Testing suite for the PyTorch Splinter model.""" import copy import unittest diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index 083f928612..b0d7261de6 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch StableLm model. """ - +"""Testing suite for the PyTorch StableLm model.""" import unittest diff --git a/tests/models/starcoder2/test_modeling_starcoder2.py b/tests/models/starcoder2/test_modeling_starcoder2.py index faba4d254b..7b05e5c165 100644 --- a/tests/models/starcoder2/test_modeling_starcoder2.py +++ b/tests/models/starcoder2/test_modeling_starcoder2.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Starcoder2 model. """ - +"""Testing suite for the PyTorch Starcoder2 model.""" import tempfile import unittest diff --git a/tests/models/swiftformer/test_modeling_swiftformer.py b/tests/models/swiftformer/test_modeling_swiftformer.py index c54e092809..ea90023b19 100644 --- a/tests/models/swiftformer/test_modeling_swiftformer.py +++ b/tests/models/swiftformer/test_modeling_swiftformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch SwiftFormer model. """ - +"""Testing suite for the PyTorch SwiftFormer model.""" import copy import unittest diff --git a/tests/models/swiftformer/test_modeling_tf_swiftformer.py b/tests/models/swiftformer/test_modeling_tf_swiftformer.py index e73d38605d..b05e390597 100644 --- a/tests/models/swiftformer/test_modeling_tf_swiftformer.py +++ b/tests/models/swiftformer/test_modeling_tf_swiftformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow SwiftFormer model. """ - +"""Testing suite for the TensorFlow SwiftFormer model.""" import inspect import unittest diff --git a/tests/models/swin/test_modeling_swin.py b/tests/models/swin/test_modeling_swin.py index 699171722d..99917adafc 100644 --- a/tests/models/swin/test_modeling_swin.py +++ b/tests/models/swin/test_modeling_swin.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Swin model. """ +"""Testing suite for the PyTorch Swin model.""" import collections import unittest diff --git a/tests/models/swin/test_modeling_tf_swin.py b/tests/models/swin/test_modeling_tf_swin.py index f05ef7a434..6a443571d3 100644 --- a/tests/models/swin/test_modeling_tf_swin.py +++ b/tests/models/swin/test_modeling_tf_swin.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TF 2.0 Swin model. """ - +"""Testing suite for the TF 2.0 Swin model.""" from __future__ import annotations diff --git a/tests/models/swin2sr/test_modeling_swin2sr.py b/tests/models/swin2sr/test_modeling_swin2sr.py index 44ca7b9249..2c09be2760 100644 --- a/tests/models/swin2sr/test_modeling_swin2sr.py +++ b/tests/models/swin2sr/test_modeling_swin2sr.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Swin2SR model. """ +"""Testing suite for the PyTorch Swin2SR model.""" + import unittest from transformers import Swin2SRConfig diff --git a/tests/models/swinv2/test_modeling_swinv2.py b/tests/models/swinv2/test_modeling_swinv2.py index 7a948d1282..c0ab1081f4 100644 --- a/tests/models/swinv2/test_modeling_swinv2.py +++ b/tests/models/swinv2/test_modeling_swinv2.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Swinv2 model. """ +"""Testing suite for the PyTorch Swinv2 model.""" + import collections import inspect import unittest diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 989517eb8c..e41b53a21f 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Table Transformer model. """ - +"""Testing suite for the PyTorch Table Transformer model.""" import inspect import math diff --git a/tests/models/tapas/test_tokenization_tapas.py b/tests/models/tapas/test_tokenization_tapas.py index 8f2bf9bb69..b64eec06d0 100644 --- a/tests/models/tapas/test_tokenization_tapas.py +++ b/tests/models/tapas/test_tokenization_tapas.py @@ -143,7 +143,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text @@ -189,7 +189,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer() - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -208,7 +208,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer(do_lower_case=True) rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True) - sequence = "UNwant\u00E9d,running" + sequence = "UNwant\u00e9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) @@ -230,7 +230,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_chinese(self): tokenizer = BasicTokenizer() - self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"]) + self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) def test_basic_tokenizer_lower(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -238,7 +238,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_false(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) @@ -246,7 +246,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) def test_basic_tokenizer_lower_strip_accents_true(self): tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) @@ -254,7 +254,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_lower_strip_accents_default(self): tokenizer = BasicTokenizer(do_lower_case=True) @@ -262,7 +262,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual( tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) def test_basic_tokenizer_no_lower(self): tokenizer = BasicTokenizer(do_lower_case=False) @@ -311,7 +311,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertTrue(_is_whitespace("\t")) self.assertTrue(_is_whitespace("\r")) self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00A0")) + self.assertTrue(_is_whitespace("\u00a0")) self.assertFalse(_is_whitespace("A")) self.assertFalse(_is_whitespace("-")) @@ -935,7 +935,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): table = self.get_table(tokenizer, length=0) tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" before_tokens = tokenizer.encode(table, sample_text, add_special_tokens=False) before_vocab = tokenizer.get_vocab() tokenizer.save_pretrained(tmpdirname) diff --git a/tests/models/time_series_transformer/test_modeling_time_series_transformer.py b/tests/models/time_series_transformer/test_modeling_time_series_transformer.py index 330cf95d06..a2694fd0b3 100644 --- a/tests/models/time_series_transformer/test_modeling_time_series_transformer.py +++ b/tests/models/time_series_transformer/test_modeling_time_series_transformer.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch TimeSeriesTransformer model. """ +"""Testing suite for the PyTorch TimeSeriesTransformer model.""" import inspect import tempfile diff --git a/tests/models/timesformer/test_modeling_timesformer.py b/tests/models/timesformer/test_modeling_timesformer.py index 3d97d2c0f6..e3c6d7fad3 100644 --- a/tests/models/timesformer/test_modeling_timesformer.py +++ b/tests/models/timesformer/test_modeling_timesformer.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch TimeSformer model. """ - +"""Testing suite for the PyTorch TimeSformer model.""" import copy import unittest diff --git a/tests/models/trocr/test_modeling_trocr.py b/tests/models/trocr/test_modeling_trocr.py index da24c7dd43..854ca22567 100644 --- a/tests/models/trocr/test_modeling_trocr.py +++ b/tests/models/trocr/test_modeling_trocr.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch TrOCR model. """ +"""Testing suite for the PyTorch TrOCR model.""" import unittest diff --git a/tests/models/tvlt/test_feature_extraction_tvlt.py b/tests/models/tvlt/test_feature_extraction_tvlt.py index cd737d5a8f..a0b3f7a916 100644 --- a/tests/models/tvlt/test_feature_extraction_tvlt.py +++ b/tests/models/tvlt/test_feature_extraction_tvlt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TVLT feature extraction. """ +"""Testing suite for the TVLT feature extraction.""" import itertools import random diff --git a/tests/models/tvlt/test_image_processor_tvlt.py b/tests/models/tvlt/test_image_processor_tvlt.py index 8677a86665..c2974da6d8 100644 --- a/tests/models/tvlt/test_image_processor_tvlt.py +++ b/tests/models/tvlt/test_image_processor_tvlt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TVLT image processor. """ +"""Testing suite for the TVLT image processor.""" import unittest diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py index ce27946ee7..574559a7a2 100644 --- a/tests/models/tvlt/test_modeling_tvlt.py +++ b/tests/models/tvlt/test_modeling_tvlt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch TVLT model. """ +"""Testing suite for the PyTorch TVLT model.""" import copy import inspect diff --git a/tests/models/tvp/test_modeling_tvp.py b/tests/models/tvp/test_modeling_tvp.py index c7bcc148a1..90050c3bdf 100644 --- a/tests/models/tvp/test_modeling_tvp.py +++ b/tests/models/tvp/test_modeling_tvp.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch TVP model. """ - +"""Testing suite for the PyTorch TVP model.""" import unittest diff --git a/tests/models/udop/test_tokenization_udop.py b/tests/models/udop/test_tokenization_udop.py index d022128ed1..8bea9880b0 100644 --- a/tests/models/udop/test_tokenization_udop.py +++ b/tests/models/udop/test_tokenization_udop.py @@ -102,7 +102,7 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer.save_pretrained(self.tmpdirname) def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00E9d,running" + input_text = "UNwant\u00e9d,running" output_text = "unwanted, running" return input_text, output_text diff --git a/tests/models/unispeech/test_modeling_unispeech.py b/tests/models/unispeech/test_modeling_unispeech.py index a286274828..7735fe4ffa 100644 --- a/tests/models/unispeech/test_modeling_unispeech.py +++ b/tests/models/unispeech/test_modeling_unispeech.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch UniSpeech model. """ +"""Testing suite for the PyTorch UniSpeech model.""" import math import unittest diff --git a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py index 3e71449357..a48ed36888 100644 --- a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py +++ b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch UniSpeechSat model. """ +"""Testing suite for the PyTorch UniSpeechSat model.""" import math import unittest diff --git a/tests/models/upernet/test_modeling_upernet.py b/tests/models/upernet/test_modeling_upernet.py index 234cd8af09..79fda279fa 100644 --- a/tests/models/upernet/test_modeling_upernet.py +++ b/tests/models/upernet/test_modeling_upernet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch UperNet framework. """ - +"""Testing suite for the PyTorch UperNet framework.""" import unittest diff --git a/tests/models/video_llava/test_modeling_video_llava.py b/tests/models/video_llava/test_modeling_video_llava.py index 52a75e8a05..1a91a2660f 100644 --- a/tests/models/video_llava/test_modeling_video_llava.py +++ b/tests/models/video_llava/test_modeling_video_llava.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VideoLlava model. """ +"""Testing suite for the PyTorch VideoLlava model.""" import gc import unittest diff --git a/tests/models/videomae/test_modeling_videomae.py b/tests/models/videomae/test_modeling_videomae.py index 425fe2bcd7..cef2ac33ae 100644 --- a/tests/models/videomae/test_modeling_videomae.py +++ b/tests/models/videomae/test_modeling_videomae.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VideoMAE model. """ - +"""Testing suite for the PyTorch VideoMAE model.""" import copy import unittest diff --git a/tests/models/vilt/test_modeling_vilt.py b/tests/models/vilt/test_modeling_vilt.py index 3e25fc3bba..194b4dd7ba 100644 --- a/tests/models/vilt/test_modeling_vilt.py +++ b/tests/models/vilt/test_modeling_vilt.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViLT model. """ +"""Testing suite for the PyTorch ViLT model.""" import unittest diff --git a/tests/models/vipllava/test_modeling_vipllava.py b/tests/models/vipllava/test_modeling_vipllava.py index ff84f71784..af278643fd 100644 --- a/tests/models/vipllava/test_modeling_vipllava.py +++ b/tests/models/vipllava/test_modeling_vipllava.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VipLlava model. """ +"""Testing suite for the PyTorch VipLlava model.""" import gc import unittest diff --git a/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py b/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py index 171f33d680..f2bd9c1243 100644 --- a/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py +++ b/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow VisionEncoderDecoder model. """ - +"""Testing suite for the TensorFlow VisionEncoderDecoder model.""" from __future__ import annotations diff --git a/tests/models/vision_text_dual_encoder/test_modeling_flax_vision_text_dual_encoder.py b/tests/models/vision_text_dual_encoder/test_modeling_flax_vision_text_dual_encoder.py index ddf8b4335f..e57a4bd4db 100644 --- a/tests/models/vision_text_dual_encoder/test_modeling_flax_vision_text_dual_encoder.py +++ b/tests/models/vision_text_dual_encoder/test_modeling_flax_vision_text_dual_encoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VisionTextDualEncoder model. """ - +"""Testing suite for the PyTorch VisionTextDualEncoder model.""" import collections import tempfile diff --git a/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py b/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py index 10baef0059..eeb8951edb 100644 --- a/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py +++ b/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VisionTextDualEncoder model. """ - +"""Testing suite for the PyTorch VisionTextDualEncoder model.""" from __future__ import annotations diff --git a/tests/models/vision_text_dual_encoder/test_modeling_vision_text_dual_encoder.py b/tests/models/vision_text_dual_encoder/test_modeling_vision_text_dual_encoder.py index 4a1ee2462e..88e06178e5 100644 --- a/tests/models/vision_text_dual_encoder/test_modeling_vision_text_dual_encoder.py +++ b/tests/models/vision_text_dual_encoder/test_modeling_vision_text_dual_encoder.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VisionTextDualEncoder model. """ - +"""Testing suite for the PyTorch VisionTextDualEncoder model.""" import collections import tempfile diff --git a/tests/models/visual_bert/test_modeling_visual_bert.py b/tests/models/visual_bert/test_modeling_visual_bert.py index 249ccdd84b..d24ea14b65 100644 --- a/tests/models/visual_bert/test_modeling_visual_bert.py +++ b/tests/models/visual_bert/test_modeling_visual_bert.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VisualBERT model. """ +"""Testing suite for the PyTorch VisualBERT model.""" import copy import unittest diff --git a/tests/models/vit/test_modeling_tf_vit.py b/tests/models/vit/test_modeling_tf_vit.py index 2c06b0bc60..a13119436f 100644 --- a/tests/models/vit/test_modeling_tf_vit.py +++ b/tests/models/vit/test_modeling_tf_vit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow ViT model. """ - +"""Testing suite for the TensorFlow ViT model.""" from __future__ import annotations diff --git a/tests/models/vit/test_modeling_vit.py b/tests/models/vit/test_modeling_vit.py index e150bd0be9..2a7886c5a6 100644 --- a/tests/models/vit/test_modeling_vit.py +++ b/tests/models/vit/test_modeling_vit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViT model. """ - +"""Testing suite for the PyTorch ViT model.""" import unittest diff --git a/tests/models/vit_hybrid/test_modeling_vit_hybrid.py b/tests/models/vit_hybrid/test_modeling_vit_hybrid.py index b3d0040c22..043dcb4de4 100644 --- a/tests/models/vit_hybrid/test_modeling_vit_hybrid.py +++ b/tests/models/vit_hybrid/test_modeling_vit_hybrid.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViT Hybrid model. """ - +"""Testing suite for the PyTorch ViT Hybrid model.""" import unittest diff --git a/tests/models/vit_mae/test_modeling_tf_vit_mae.py b/tests/models/vit_mae/test_modeling_tf_vit_mae.py index 4221d6bfd3..5c27e5ac80 100644 --- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow ViTMAE model. """ - +"""Testing suite for the TensorFlow ViTMAE model.""" from __future__ import annotations diff --git a/tests/models/vit_mae/test_modeling_vit_mae.py b/tests/models/vit_mae/test_modeling_vit_mae.py index 6c981adeb8..0357a3ebda 100644 --- a/tests/models/vit_mae/test_modeling_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_vit_mae.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViTMAE model. """ - +"""Testing suite for the PyTorch ViTMAE model.""" import math import tempfile diff --git a/tests/models/vit_msn/test_modeling_vit_msn.py b/tests/models/vit_msn/test_modeling_vit_msn.py index be0857181e..f911749c82 100644 --- a/tests/models/vit_msn/test_modeling_vit_msn.py +++ b/tests/models/vit_msn/test_modeling_vit_msn.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViTMSN model. """ - +"""Testing suite for the PyTorch ViTMSN model.""" import unittest diff --git a/tests/models/vitdet/test_modeling_vitdet.py b/tests/models/vitdet/test_modeling_vitdet.py index 2df1b79257..49076afb4d 100644 --- a/tests/models/vitdet/test_modeling_vitdet.py +++ b/tests/models/vitdet/test_modeling_vitdet.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViTDet model. """ - +"""Testing suite for the PyTorch ViTDet model.""" import unittest diff --git a/tests/models/vitmatte/test_modeling_vitmatte.py b/tests/models/vitmatte/test_modeling_vitmatte.py index 4a8e85160b..ccdefe957c 100644 --- a/tests/models/vitmatte/test_modeling_vitmatte.py +++ b/tests/models/vitmatte/test_modeling_vitmatte.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VitMatte model. """ - +"""Testing suite for the PyTorch VitMatte model.""" import unittest diff --git a/tests/models/vits/test_modeling_vits.py b/tests/models/vits/test_modeling_vits.py index b83165aff4..791071e096 100644 --- a/tests/models/vits/test_modeling_vits.py +++ b/tests/models/vits/test_modeling_vits.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch VITS model. """ +"""Testing suite for the PyTorch VITS model.""" import copy import os diff --git a/tests/models/vits/test_tokenization_vits.py b/tests/models/vits/test_tokenization_vits.py index fee6bac3a4..a71c5a05c4 100644 --- a/tests/models/vits/test_tokenization_vits.py +++ b/tests/models/vits/test_tokenization_vits.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tests for the VITS tokenizer.""" + import json import os import shutil @@ -87,7 +88,7 @@ class VitsTokenizerTest(TokenizerTesterMixin, unittest.TestCase): # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" before_tokens = tokenizer.encode(sample_text, add_special_tokens=False) before_vocab = tokenizer.get_vocab() tokenizer.save_pretrained(tmpdirname) diff --git a/tests/models/vivit/test_modeling_vivit.py b/tests/models/vivit/test_modeling_vivit.py index cbb45731ae..dd13d0306a 100644 --- a/tests/models/vivit/test_modeling_vivit.py +++ b/tests/models/vivit/test_modeling_vivit.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch ViViT model. """ - +"""Testing suite for the PyTorch ViViT model.""" import copy import inspect diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py index 9d86fb245c..eed419910d 100644 --- a/tests/models/wav2vec2/test_modeling_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Wav2Vec2 model. """ +"""Testing suite for the PyTorch Wav2Vec2 model.""" import gc import math diff --git a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py index a4a0a95972..8fae3a4bad 100644 --- a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py +++ b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Wav2Vec2-BERT model. """ +"""Testing suite for the PyTorch Wav2Vec2-BERT model.""" + import tempfile import unittest diff --git a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py index 5c7bfd0a9b..020f61c82f 100644 --- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py +++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py @@ -12,7 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Wav2Vec2-Conformer model. """ +"""Testing suite for the PyTorch Wav2Vec2-Conformer model.""" + import math import tempfile import unittest diff --git a/tests/models/wav2vec2_phoneme/test_tokenization_wav2vec2_phoneme.py b/tests/models/wav2vec2_phoneme/test_tokenization_wav2vec2_phoneme.py index ea81c88ede..d3207cee1d 100644 --- a/tests/models/wav2vec2_phoneme/test_tokenization_wav2vec2_phoneme.py +++ b/tests/models/wav2vec2_phoneme/test_tokenization_wav2vec2_phoneme.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tests for the Wav2Vec2Phoneme tokenizer.""" + import json import os import unittest diff --git a/tests/models/wavlm/test_modeling_wavlm.py b/tests/models/wavlm/test_modeling_wavlm.py index 3cf4348f6c..2e8754d19f 100644 --- a/tests/models/wavlm/test_modeling_wavlm.py +++ b/tests/models/wavlm/test_modeling_wavlm.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch WavLM model. """ +"""Testing suite for the PyTorch WavLM model.""" import math import unittest diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 9d1a329934..e2df1b1397 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the TensorFlow Whisper model. """ +"""Testing suite for the TensorFlow Whisper model.""" from __future__ import annotations diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index 58acb5f2fd..70e37d7649 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch Whisper model. """ +"""Testing suite for the PyTorch Whisper model.""" import copy import inspect diff --git a/tests/models/x_clip/test_modeling_x_clip.py b/tests/models/x_clip/test_modeling_x_clip.py index fc5c1679a6..3f7628a945 100644 --- a/tests/models/x_clip/test_modeling_x_clip.py +++ b/tests/models/x_clip/test_modeling_x_clip.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch XCLIP model. """ - +"""Testing suite for the PyTorch XCLIP model.""" import inspect import os diff --git a/tests/models/xlnet/test_tokenization_xlnet.py b/tests/models/xlnet/test_tokenization_xlnet.py index bd65e6c80b..32dd4685c8 100644 --- a/tests/models/xlnet/test_tokenization_xlnet.py +++ b/tests/models/xlnet/test_tokenization_xlnet.py @@ -154,7 +154,7 @@ class XLNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ".", ], ) - self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["▁he", "ll", "o"]) + self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["▁he", "ll", "o"]) def test_tokenizer_no_lower(self): tokenizer = XLNetTokenizer(SAMPLE_VOCAB, do_lower_case=False) diff --git a/tests/models/yolos/test_modeling_yolos.py b/tests/models/yolos/test_modeling_yolos.py index 9c145388ff..f8edfc7ff8 100644 --- a/tests/models/yolos/test_modeling_yolos.py +++ b/tests/models/yolos/test_modeling_yolos.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch YOLOS model. """ - +"""Testing suite for the PyTorch YOLOS model.""" import unittest diff --git a/tests/models/yoso/test_modeling_yoso.py b/tests/models/yoso/test_modeling_yoso.py index ca41b074bc..f7fa4ea31b 100644 --- a/tests/models/yoso/test_modeling_yoso.py +++ b/tests/models/yoso/test_modeling_yoso.py @@ -12,8 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Testing suite for the PyTorch YOSO model. """ - +"""Testing suite for the PyTorch YOSO model.""" import unittest diff --git a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py index fd8b36fc9a..126659f0bc 100644 --- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py +++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for sequence classification on GLUE.""" +"""Finetuning the library models for sequence classification on GLUE.""" # You can also adapt this script on your own text classification task. Pointers for this are left as comments. import logging diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index 76402cd092..56814b74f5 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -709,7 +709,7 @@ class TokenizerTesterMixin: # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" before_tokens = tokenizer.encode(sample_text, add_special_tokens=False) before_vocab = tokenizer.get_vocab() tokenizer.save_pretrained(tmpdirname) @@ -728,7 +728,7 @@ class TokenizerTesterMixin: # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" tokenizer.add_tokens(["bim", "bambam"]) additional_special_tokens = tokenizer.additional_special_tokens additional_special_tokens.append("new_additional_special_token") @@ -764,7 +764,7 @@ class TokenizerTesterMixin: # Isolate this from the other tests because we save additional tokens/etc tmpdirname = tempfile.mkdtemp() - sample_text = " He is very happy, UNwant\u00E9d,running" + sample_text = " He is very happy, UNwant\u00e9d,running" tokenizer.add_tokens(["bim", "bambam"]) additional_special_tokens = tokenizer.additional_special_tokens additional_special_tokens.append("new_additional_special_token") diff --git a/tests/tokenization/test_tokenization_utils.py b/tests/tokenization/test_tokenization_utils.py index e5838dd4a3..7ff6b29629 100644 --- a/tests/tokenization/test_tokenization_utils.py +++ b/tests/tokenization/test_tokenization_utils.py @@ -15,6 +15,7 @@ """ isort:skip_file """ + import os import pickle import tempfile diff --git a/utils/add_pipeline_model_mapping_to_test.py b/utils/add_pipeline_model_mapping_to_test.py index ebefcff9af..e67f65f824 100644 --- a/utils/add_pipeline_model_mapping_to_test.py +++ b/utils/add_pipeline_model_mapping_to_test.py @@ -24,7 +24,6 @@ This script will be (mostly) used in the following 2 situations: This script is **NOT** intended to be run (manually) by community contributors. """ - import argparse import glob import inspect diff --git a/utils/check_copies.py b/utils/check_copies.py index dd5d5c77da..c4fa2fbaa0 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -753,9 +753,9 @@ def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = No else: # not in the target --> add it theoretical_code_blocks[f"_ignored_new_block_{ignored_new_block_index}"] = code - name_mappings_1[ + name_mappings_1[f"_ignored_new_block_{ignored_new_block_index}"] = ( f"_ignored_new_block_{ignored_new_block_index}" - ] = f"_ignored_new_block_{ignored_new_block_index}" + ) del observed_code_blocks[name] observed_code_blocks[f"_ignored_new_block_{ignored_new_block_index}"] = code diff --git a/utils/check_doc_toc.py b/utils/check_doc_toc.py index ccbff5e0b6..f90ffc2cc9 100644 --- a/utils/check_doc_toc.py +++ b/utils/check_doc_toc.py @@ -31,7 +31,6 @@ python utils/check_doc_toc.py --fix_and_overwrite ``` """ - import argparse from collections import defaultdict from typing import List diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 04572d132b..ed57cd308e 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -32,6 +32,7 @@ python utils/check_docstrings.py --fix_and_overwrite which is used by `make fix-copies` (note that this fills what it cans, you might have to manually fill information like argument descriptions). """ + import argparse import ast import enum diff --git a/utils/check_doctest_list.py b/utils/check_doctest_list.py index f39895ff52..1d763475cb 100644 --- a/utils/check_doctest_list.py +++ b/utils/check_doctest_list.py @@ -30,6 +30,7 @@ Auto-sort the doctest list if it is not properly sorted (used in `make fix-copie python utils/check_doctest_list.py --fix_and_overwrite ``` """ + import argparse import os diff --git a/utils/check_dummies.py b/utils/check_dummies.py index a3ab6ebfa7..e66d69ada1 100644 --- a/utils/check_dummies.py +++ b/utils/check_dummies.py @@ -33,6 +33,7 @@ Update the dummy files if needed (used in `make fix-copies`): python utils/check_dummies.py --fix_and_overwrite ``` """ + import argparse import os import re diff --git a/utils/check_repo.py b/utils/check_repo.py index 13dcd6ad97..dcb1374d8e 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -30,6 +30,7 @@ python utils/check_repo.py It has no auto-fix mode. """ + import inspect import os import re diff --git a/utils/check_support_list.py b/utils/check_support_list.py index 3cb0b61602..89e1bcf9d6 100644 --- a/utils/check_support_list.py +++ b/utils/check_support_list.py @@ -25,6 +25,7 @@ python utils/check_support_list.py It has no auto-fix mode. """ + import os from glob import glob diff --git a/utils/check_table.py b/utils/check_table.py index 9c9318ca85..0866f6bf61 100644 --- a/utils/check_table.py +++ b/utils/check_table.py @@ -31,6 +31,7 @@ python utils/check_inits.py --fix_and_overwrite which is used by `make fix-copies`. """ + import argparse import collections import os diff --git a/utils/custom_init_isort.py b/utils/custom_init_isort.py index cdf2aaffdf..7adf804eaf 100644 --- a/utils/custom_init_isort.py +++ b/utils/custom_init_isort.py @@ -34,6 +34,7 @@ For a check only (as used in `make quality`) run: python utils/custom_init_isort.py --check_only ``` """ + import argparse import os import re diff --git a/utils/download_glue_data.py b/utils/download_glue_data.py index ef482d47de..22e9fcae47 100644 --- a/utils/download_glue_data.py +++ b/utils/download_glue_data.py @@ -1,4 +1,4 @@ -""" Script for downloading all GLUE data. +"""Script for downloading all GLUE data. Original source: https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e Note: for legal reasons, we are unable to host MRPC. diff --git a/utils/models_to_deprecate.py b/utils/models_to_deprecate.py index ebdecf22eb..17ea1fd28e 100644 --- a/utils/models_to_deprecate.py +++ b/utils/models_to_deprecate.py @@ -14,6 +14,7 @@ """ Script to find a candidate list of models to deprecate based on the number of downloads and the date of the last commit. """ + import argparse import glob import json diff --git a/utils/release.py b/utils/release.py index dbddba2ac5..b0349a80b4 100644 --- a/utils/release.py +++ b/utils/release.py @@ -41,6 +41,7 @@ python release.py --post_release or use `make post-release`. """ + import argparse import os import re diff --git a/utils/sort_auto_mappings.py b/utils/sort_auto_mappings.py index 8871b6807d..42382b6dfe 100644 --- a/utils/sort_auto_mappings.py +++ b/utils/sort_auto_mappings.py @@ -29,6 +29,7 @@ To only check if the mappings are properly sorted (as used in `make quality`), d python utils/sort_auto_mappings.py --check_only ``` """ + import argparse import os import re diff --git a/utils/update_metadata.py b/utils/update_metadata.py index 2296d2998f..1806eb3f03 100755 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -28,6 +28,7 @@ that new pipelines are properly added as metadata (as used in `make repo-consist python utils/update_metadata.py --check-only ``` """ + import argparse import collections import os diff --git a/utils/update_tiny_models.py b/utils/update_tiny_models.py index 2e40990691..2cd8f72d56 100644 --- a/utils/update_tiny_models.py +++ b/utils/update_tiny_models.py @@ -20,7 +20,6 @@ version of `tests/utils/tiny_model_summary.json`. That updated file should be me `transformers` so the pipeline testing will use the latest created/updated tiny models. """ - import argparse import copy import json From 8e8786e5f04d7bba787ad7876630c72e20092016 Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Wed, 22 May 2024 10:52:59 +0200 Subject: [PATCH 29/80] Update build ci image [push-ci-image] (#30933) * [build-ci-image] * correct branch * push ci image * [build-ci-image] * update scheduled as well * [push-ci-image] * [build-ci-image] * [push-ci-image] * update deps * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * oups [build-ci-image] * [push-ci-image] * fix * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * updated * [build-ci-image] update tag * [build-ci-image] * [build-ci-image] * fix tag * [build-ci-image] * [build-ci-image] * [build-ci-image] * [build-ci-image] * github name * commit_title? * fetch * update * it not found * dev * dev * [push-ci-image] * dev * dev * update * dev * dev print dev commit message dev * dev ? dev * dev * dev * dev * dev * [build-ci-image] * [build-ci-image] * [push-ci-image] * revert unwanted * revert convert as well * no you are not important * [build-ci-image] * Update .circleci/config.yml * pin tf probability dev --- .circleci/config.yml | 3 ++- .circleci/create_circleci_config.py | 6 ++++++ .github/workflows/build-ci-docker-images.yml | 18 ++++++++++++++---- docker/consistency.dockerfile | 3 ++- docker/jax-light.dockerfile | 3 ++- docker/pipeline-tf.dockerfile | 3 ++- docker/pipeline-torch.dockerfile | 3 ++- docker/quality.dockerfile | 3 ++- docker/tf-light.dockerfile | 3 ++- docker/torch-jax-light.dockerfile | 3 ++- docker/torch-light.dockerfile | 3 ++- setup.py | 6 +++--- src/transformers/dependency_versions_table.py | 4 ++-- 13 files changed, 43 insertions(+), 18 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index ddb8869a6a..9d04cfb941 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,6 +31,7 @@ jobs: steps: - checkout - run: uv pip install -U -e . + - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt - store_artifacts: @@ -80,7 +81,7 @@ jobs: path: ~/transformers/test_preparation/filtered_test_list.txt - store_artifacts: path: test_preparation/examples_test_list.txt - - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation + - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - run: | if [ ! -s test_preparation/generated_config.yml ]; then echo "No tests to run, exiting early!" diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 8b6a773ee4..1427b08a96 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -72,6 +72,12 @@ class CircleCIJob: if self.docker_image is None: # Let's avoid changing the default list and make a copy. self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE) + else: + # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED + print(os.environ.get("GIT_COMMIT_MESSAGE")) + if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or "dev" in os.environ.get("GIT_COMMIT_MESSAGE", ""): + self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev" + print(f"Using {self.docker_image} docker image") if self.install_steps is None: self.install_steps = [] if self.pytest_options is None: diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml index f7b75a3a30..6d6df220e4 100644 --- a/.github/workflows/build-ci-docker-images.yml +++ b/.github/workflows/build-ci-docker-images.yml @@ -3,7 +3,7 @@ name: Build pr ci-docker on: push: branches: - - change-ci # for now let's only build on this branch + - push-ci-image # for now let's only build on this branch repository_dispatch: workflow_call: inputs: @@ -22,7 +22,7 @@ jobs: build: runs-on: ubuntu-22.04 - if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' }} + if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }} strategy: matrix: @@ -30,6 +30,16 @@ jobs: continue-on-error: true steps: + - + name: Set tag + run: | + if [ ${{contains(github.event.head_commit.message, '[build-ci-image]')}} ]; then + echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV" + echo "setting it to DEV!" + else + echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV" + + fi - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -50,5 +60,5 @@ jobs: build-args: | REF=${{ github.sha }} file: "./docker/${{ matrix.file }}.dockerfile" - push: true - tags: huggingface/transformers-${{ matrix.file }} \ No newline at end of file + push: ${{ contains(github.event.head_commit.message, 'ci-image]') || github.event_name == 'schedule' }} + tags: ${{ env.TAG }} \ No newline at end of file diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile index fa94259e2d..6bdcefc458 100644 --- a/docker/consistency.dockerfile +++ b/docker/consistency.dockerfile @@ -1,12 +1,13 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 USER root +ARG REF=main RUN apt-get update && apt-get install -y time git pkg-config make git-lfs ENV VIRTUAL_ENV=/usr/local RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython RUN uv pip install --no-cache-dir --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-cache-dir tensorflow-cpu tf-keras -RUN uv pip install --no-cache-dir "transformers[flax,quality,vision,testing]" +RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,vision,testing]" RUN git lfs install RUN pip uninstall -y transformers diff --git a/docker/jax-light.dockerfile b/docker/jax-light.dockerfile index 8383330628..d9ca5948f9 100644 --- a/docker/jax-light.dockerfile +++ b/docker/jax-light.dockerfile @@ -1,9 +1,10 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake ENV VIRTUAL_ENV=/usr/local RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]" +RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]" RUN pip uninstall -y transformers RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file diff --git a/docker/pipeline-tf.dockerfile b/docker/pipeline-tf.dockerfile index 81af0ea9c1..3c83a254c6 100644 --- a/docker/pipeline-tf.dockerfile +++ b/docker/pipeline-tf.dockerfile @@ -1,9 +1,10 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++ ENV VIRTUAL_ENV=/usr/local RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir "transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]" +RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]" RUN uv pip install --no-cache-dir "protobuf==3.20.3" tensorflow_probability RUN apt-get clean && rm -rf /var/lib/apt/lists/* \ No newline at end of file diff --git a/docker/pipeline-torch.dockerfile b/docker/pipeline-torch.dockerfile index 554d978314..a496777c2c 100644 --- a/docker/pipeline-torch.dockerfile +++ b/docker/pipeline-torch.dockerfile @@ -1,10 +1,11 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ENV VIRTUAL_ENV=/usr/local RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir librosa "transformers[sklearn,sentencepiece,vision,testing]" +RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" RUN pip uninstall -y transformers \ No newline at end of file diff --git a/docker/quality.dockerfile b/docker/quality.dockerfile index 471af0526b..631467a599 100644 --- a/docker/quality.dockerfile +++ b/docker/quality.dockerfile @@ -1,8 +1,9 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y time git ENV VIRTUAL_ENV=/usr/local RUN pip install uv && uv venv -RUN uv pip install --no-cache-dir -U pip setuptools GitPython transformers "ruff==0.1.5" urllib3 +RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3 RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/* \ No newline at end of file diff --git a/docker/tf-light.dockerfile b/docker/tf-light.dockerfile index 23dcd40db2..894f55870d 100644 --- a/docker/tf-light.dockerfile +++ b/docker/tf-light.dockerfile @@ -1,11 +1,12 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ pkg-config openssh-client git RUN apt-get install -y cmake ENV VIRTUAL_ENV=/usr/local RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --upgrade --no-cache-dir "transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" +RUN pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" RUN uv pip install --no-cache-dir "protobuf==3.20.3" RUN pip uninstall -y transformers RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file diff --git a/docker/torch-jax-light.dockerfile b/docker/torch-jax-light.dockerfile index ef28563ec1..cfdbef788b 100644 --- a/docker/torch-jax-light.dockerfile +++ b/docker/torch-jax-light.dockerfile @@ -1,12 +1,13 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ENV VIRTUAL_ENV=/usr/local RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN uv pip install --no-deps accelerate RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu -RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax, audio, sklearn,sentencepiece,vision,testing]" +RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]" # RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]" diff --git a/docker/torch-light.dockerfile b/docker/torch-light.dockerfile index 2172d66ca8..2c169d5e6d 100644 --- a/docker/torch-light.dockerfile +++ b/docker/torch-light.dockerfile @@ -1,10 +1,11 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ENV VIRTUAL_ENV=/usr/local RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir librosa "transformers[sklearn,sentencepiece,vision,testing]" +RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" RUN pip uninstall -y transformers \ No newline at end of file diff --git a/setup.py b/setup.py index 9ad7c84b58..3d6c78fd9a 100644 --- a/setup.py +++ b/setup.py @@ -173,10 +173,10 @@ _deps = [ "tensorflow-cpu>2.9,<2.16", "tensorflow>2.9,<2.16", "tensorflow-text<2.16", - "tensorflow-probability<2.16", + "tensorflow-probability<0.24", "tf2onnx", "timeout-decorator", - "timm", + "timm<=0.9.16", "tokenizers>=0.19,<0.20", "torch", "torchaudio", @@ -339,7 +339,7 @@ extras["testing"] = ( ) extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"] + extras["sentencepiece"] - +extras["ruff"] = deps_list("ruff") extras["quality"] = deps_list("datasets", "isort", "ruff", "GitPython", "urllib3") extras["all"] = ( diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 2e48650b9c..29c916aff6 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -78,10 +78,10 @@ deps = { "tensorflow-cpu": "tensorflow-cpu>2.9,<2.16", "tensorflow": "tensorflow>2.9,<2.16", "tensorflow-text": "tensorflow-text<2.16", - "tensorflow-probability": "tensorflow-probability<2.16", + "tensorflow-probability": "tensorflow-probability<0.24", "tf2onnx": "tf2onnx", "timeout-decorator": "timeout-decorator", - "timm": "timm", + "timm": "timm<=0.9.16", "tokenizers": "tokenizers>=0.19,<0.20", "torch": "torch", "torchaudio": "torchaudio", From edb14eba641bbbfc4170b4341475ed0e80748e49 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 May 2024 11:27:41 +0100 Subject: [PATCH 30/80] Bump requests from 2.31.0 to 2.32.2 in /examples/research_projects/lxmert (#30956) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- examples/research_projects/lxmert/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/research_projects/lxmert/requirements.txt b/examples/research_projects/lxmert/requirements.txt index 60a1858b4e..1a3c80e086 100644 --- a/examples/research_projects/lxmert/requirements.txt +++ b/examples/research_projects/lxmert/requirements.txt @@ -75,7 +75,7 @@ pyzmq==19.0.2 qtconsole==4.7.7 QtPy==1.9.0 regex==2020.7.14 -requests==2.31.0 +requests==2.32.2 retrying==1.3.3 sacremoses==0.0.43 Send2Trash==1.5.0 From 934e1b84e956ea74323b92eb86300126b8139a42 Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Wed, 22 May 2024 16:56:41 +0500 Subject: [PATCH 31/80] Update video-llava docs (#30935) * update video-llava * Update docs/source/en/model_doc/video_llava.md Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- docs/source/en/model_doc/video_llava.md | 96 ++++++++++++++++--- .../video_llava/modeling_video_llava.py | 14 +-- 2 files changed, 91 insertions(+), 19 deletions(-) diff --git a/docs/source/en/model_doc/video_llava.md b/docs/source/en/model_doc/video_llava.md index 7f27e65a4e..307c55bb2c 100644 --- a/docs/source/en/model_doc/video_llava.md +++ b/docs/source/en/model_doc/video_llava.md @@ -42,21 +42,28 @@ a unified visual representation, outperforming models designed specifically for work to provide modest insights into the multi-modal inputs for the LLM* -Tips: +## Usage tips: - We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating. - Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results. -- For better results, we recommend users prompt the model with the correct prompt format: +- Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting. +This model was contributed by [RaushanTurganbay](https://huggingface.co/RaushanTurganbay). +The original code can be found [here](https://github.com/PKU-YuanGroup/Video-LLaVA). + + +## Usage example + +### Single Media Mode + +The model can accept both images and videos as input. Here's an example code for inference in half-precision (`torch.float16`): ```python import av import torch import numpy as np -import requests -from PIL import Image from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor def read_video_pyav(container, indices): @@ -79,36 +86,99 @@ def read_video_pyav(container, indices): frames.append(frame) return np.stack([x.to_ndarray(format="rgb24") for x in frames]) - -model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", device_map="auto") +# Load the model in half-precision +model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto") processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf") +# Load the video as an np.arrau, sampling uniformly 8 frames video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset") - container = av.open(video_path) total_frames = container.streams.video[0].frames indices = np.arange(0, total_frames, total_frames / 8).astype(int) video = read_video_pyav(container, indices) +# For better results, we recommend to prompt the model in the following format prompt = "USER: