From 9acce7de1cb8229304a467938ebb47727d60cdb2 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:16:59 +0100 Subject: [PATCH] Core: Fix copies on main (#29624) fix fix copies --- src/transformers/models/gptj/modeling_gptj.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index c495d281db..144dbba055 100644 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -454,7 +454,7 @@ class GPTJFlashAttention2(GPTJAttention): attention_mask (`torch.Tensor`): The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the position of padding tokens and 1 for the position of non-padding tokens. - dropout (`int`, *optional*): + dropout (`float`): Attention dropout softmax_scale (`float`, *optional*): The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)