diff --git a/src/transformers/models/blip_2/modeling_blip_2.py b/src/transformers/models/blip_2/modeling_blip_2.py index 935e041eb8..edd0d9a6d7 100644 --- a/src/transformers/models/blip_2/modeling_blip_2.py +++ b/src/transformers/models/blip_2/modeling_blip_2.py @@ -1194,9 +1194,13 @@ class Blip2Model(Blip2PreTrainedModel): self.language_projection = nn.Linear(config.qformer_config.hidden_size, config.text_config.hidden_size) if config.use_decoder_only_language_model: - language_model = AutoModelForCausalLM.from_config(config.text_config) + language_model = AutoModelForCausalLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) else: - language_model = AutoModelForSeq2SeqLM.from_config(config.text_config) + language_model = AutoModelForSeq2SeqLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) # Update _tied_weights_keys using the base model used. if language_model._tied_weights_keys is not None: @@ -1549,9 +1553,13 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel): self.language_projection = nn.Linear(config.qformer_config.hidden_size, config.text_config.hidden_size) if config.use_decoder_only_language_model: - language_model = AutoModelForCausalLM.from_config(config.text_config) + language_model = AutoModelForCausalLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) else: - language_model = AutoModelForSeq2SeqLM.from_config(config.text_config) + language_model = AutoModelForSeq2SeqLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) # Update _tied_weights_keys using the base model used. if language_model._tied_weights_keys is not None: diff --git a/src/transformers/models/depth_anything/modeling_depth_anything.py b/src/transformers/models/depth_anything/modeling_depth_anything.py index 788b0d9113..bed91ac2a4 100644 --- a/src/transformers/models/depth_anything/modeling_depth_anything.py +++ b/src/transformers/models/depth_anything/modeling_depth_anything.py @@ -367,7 +367,9 @@ class DepthAnythingForDepthEstimation(DepthAnythingPreTrainedModel): def __init__(self, config): super().__init__(config) - self.backbone = AutoBackbone.from_config(config.backbone_config) + self.backbone = AutoBackbone.from_config( + config.backbone_config, attn_implementation=config._attn_implementation + ) self.neck = DepthAnythingNeck(config) self.head = DepthAnythingDepthEstimationHead(config) diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 16248fee64..2b185cc14a 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -209,12 +209,12 @@ class EncoderDecoderModel(PreTrainedModel): if encoder is None: from ..auto.modeling_auto import AutoModel - encoder = AutoModel.from_config(config.encoder) + encoder = AutoModel.from_config(config.encoder, attn_implementation=config._attn_implementation) if decoder is None: from ..auto.modeling_auto import AutoModelForCausalLM - decoder = AutoModelForCausalLM.from_config(config.decoder) + decoder = AutoModelForCausalLM.from_config(config.decoder, attn_implementation=config._attn_implementation) self.encoder = encoder self.decoder = decoder diff --git a/src/transformers/models/fuyu/modeling_fuyu.py b/src/transformers/models/fuyu/modeling_fuyu.py index 8e9a41954a..bdaec5f868 100644 --- a/src/transformers/models/fuyu/modeling_fuyu.py +++ b/src/transformers/models/fuyu/modeling_fuyu.py @@ -149,7 +149,9 @@ class FuyuForCausalLM(FuyuPreTrainedModel): super().__init__(config) self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size - self.language_model = AutoModelForCausalLM.from_config(config.text_config) + self.language_model = AutoModelForCausalLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) self.vision_embed_tokens = nn.Linear( config.patch_size * config.patch_size * config.num_channels, config.hidden_size diff --git a/src/transformers/models/idefics2/modeling_idefics2.py b/src/transformers/models/idefics2/modeling_idefics2.py index 28cd615554..7f61e95a9b 100644 --- a/src/transformers/models/idefics2/modeling_idefics2.py +++ b/src/transformers/models/idefics2/modeling_idefics2.py @@ -1476,7 +1476,7 @@ class Idefics2Model(Idefics2PreTrainedModel): self.vision_model = Idefics2VisionTransformer(config.vision_config) self.connector = Idefics2Connector(config) - self.text_model = AutoModel.from_config(config.text_config) + self.text_model = AutoModel.from_config(config.text_config, attn_implementation=config._attn_implementation) self.image_seq_len = config.perceiver_config.resampler_n_latents self.image_token_id = self.config.image_token_id diff --git a/src/transformers/models/instructblip/modeling_instructblip.py b/src/transformers/models/instructblip/modeling_instructblip.py index b18d467231..52f8fa610a 100644 --- a/src/transformers/models/instructblip/modeling_instructblip.py +++ b/src/transformers/models/instructblip/modeling_instructblip.py @@ -1251,9 +1251,13 @@ class InstructBlipForConditionalGeneration(InstructBlipPreTrainedModel): self.language_projection = nn.Linear(config.qformer_config.hidden_size, config.text_config.hidden_size) if config.use_decoder_only_language_model: - language_model = AutoModelForCausalLM.from_config(config.text_config) + language_model = AutoModelForCausalLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) else: - language_model = AutoModelForSeq2SeqLM.from_config(config.text_config) + language_model = AutoModelForSeq2SeqLM.from_config( + config.text_config, attn_implementation=config._attn_implementation + ) if language_model._no_split_modules is not None: self._no_split_modules.extend(language_model._no_split_modules) diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 80dec5bc3d..8a6f959a49 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -506,12 +506,16 @@ class RagModel(RagPreTrainedModel): if question_encoder is None: from ..auto.modeling_auto import AutoModel - question_encoder = AutoModel.from_config(config.question_encoder) + question_encoder = AutoModel.from_config( + config.question_encoder, attn_implementation=config._attn_implementation + ) if generator is None: from ..auto.modeling_auto import AutoModelForSeq2SeqLM - generator = AutoModelForSeq2SeqLM.from_config(config.generator) + generator = AutoModelForSeq2SeqLM.from_config( + config.generator, attn_implementation=config._attn_implementation + ) self.retriever = retriever if self.retriever is not None: diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py index 942dfb5f9c..77b69afe8f 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py @@ -212,10 +212,10 @@ class SpeechEncoderDecoderModel(PreTrainedModel): super().__init__(config) if encoder is None: - encoder = AutoModel.from_config(config.encoder) + encoder = AutoModel.from_config(config.encoder, attn_implementation=config._attn_implementation) if decoder is None: - decoder = AutoModelForCausalLM.from_config(config.decoder) + decoder = AutoModelForCausalLM.from_config(config.decoder, attn_implementation=config._attn_implementation) self.encoder = encoder self.decoder = decoder diff --git a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py index 0bdf760441..fc72eb1cbd 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py @@ -190,10 +190,10 @@ class VisionEncoderDecoderModel(PreTrainedModel): super().__init__(config) if encoder is None: - encoder = AutoModel.from_config(config.encoder) + encoder = AutoModel.from_config(config.encoder, attn_implementation=config._attn_implementation) if decoder is None: - decoder = AutoModelForCausalLM.from_config(config.decoder) + decoder = AutoModelForCausalLM.from_config(config.decoder, attn_implementation=config._attn_implementation) self.encoder = encoder self.decoder = decoder diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index cd4d5bd7a1..0f82bdd0c3 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -185,10 +185,12 @@ class VisionTextDualEncoderModel(PreTrainedModel): if isinstance(config.vision_config, CLIPVisionConfig): vision_model = CLIPVisionModel(config.vision_config) else: - vision_model = AutoModel.from_config(config.vision_config) + vision_model = AutoModel.from_config( + config.vision_config, attn_implementation=config._attn_implementation + ) if text_model is None: - text_model = AutoModel.from_config(config.text_config) + text_model = AutoModel.from_config(config.text_config, attn_implementation=config._attn_implementation) self.vision_model = vision_model self.text_model = text_model