Fix annotations (#24582)
* fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations * fix annotations
This commit is contained in:
parent
c817bc44e2
commit
232c898f9f
|
@ -735,7 +735,7 @@ class AutoformerEncoderLayer(nn.Module):
|
|||
) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -321,7 +321,7 @@ class TFBartEncoderLayer(tf.keras.layers.Layer):
|
|||
) -> tf.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -394,11 +394,11 @@ class TFBartDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -1388,7 +1388,7 @@ class BigBirdPegasusEncoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
output_attentions (`bool`, *optional*):
|
||||
|
|
|
@ -317,7 +317,7 @@ class TFBlenderbotEncoderLayer(tf.keras.layers.Layer):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -391,11 +391,11 @@ class TFBlenderbotDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -317,7 +317,7 @@ class TFBlenderbotSmallEncoderLayer(tf.keras.layers.Layer):
|
|||
) -> tf.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -391,11 +391,11 @@ class TFBlenderbotSmallDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -795,7 +795,7 @@ class ConditionalDetrEncoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
|
|
@ -851,7 +851,7 @@ class DetaDecoderLayer(nn.Module):
|
|||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`):
|
||||
Input to the layer of shape `(seq_len, batch, embed_dim)`.
|
||||
Input to the layer of shape `(batch, seq_len, embed_dim)`.
|
||||
position_embeddings (`torch.FloatTensor`, *optional*):
|
||||
Position embeddings that are added to the queries and keys in the self-attention layer.
|
||||
reference_points (`torch.FloatTensor`, *optional*):
|
||||
|
@ -861,7 +861,7 @@ class DetaDecoderLayer(nn.Module):
|
|||
level_start_index (`torch.LongTensor`, *optional*):
|
||||
Level start index.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
|
|
@ -642,7 +642,7 @@ class DetrEncoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
@ -723,7 +723,7 @@ class DetrDecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
@ -734,7 +734,7 @@ class DetrDecoderLayer(nn.Module):
|
|||
position embeddings that are added to the queries and keys
|
||||
in the self-attention layer.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
|
|
@ -738,7 +738,7 @@ class InformerEncoderLayer(nn.Module):
|
|||
) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -962,7 +962,7 @@ class LEDEncoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -1040,11 +1040,11 @@ class LEDDecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -1181,7 +1181,7 @@ class TFLEDEncoderLayer(tf.keras.layers.Layer):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -1256,11 +1256,11 @@ class TFLEDDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -354,7 +354,7 @@ class TFMarianEncoderLayer(tf.keras.layers.Layer):
|
|||
) -> tf.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -428,11 +428,11 @@ class TFMarianDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -571,7 +571,7 @@ class DetrDecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
@ -582,7 +582,7 @@ class DetrDecoderLayer(nn.Module):
|
|||
position embeddings that are added to the queries and keys
|
||||
in the self-attention layer.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
|
|
@ -322,7 +322,7 @@ class TFMBartEncoderLayer(tf.keras.layers.Layer):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -395,11 +395,11 @@ class TFMBartDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -327,7 +327,7 @@ class MvpEncoderLayer(nn.Module):
|
|||
) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -672,7 +672,7 @@ class NllbMoeEncoderLayer(nn.Module):
|
|||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`):
|
||||
input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`):
|
||||
attention mask of size `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very
|
||||
large negative values.
|
||||
|
|
|
@ -303,7 +303,7 @@ class TFOPTDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`, *optional*): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`, *optional*): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -356,7 +356,7 @@ class TFPegasusEncoderLayer(tf.keras.layers.Layer):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -430,11 +430,11 @@ class TFPegasusDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -400,7 +400,7 @@ class TFSpeech2TextEncoderLayer(tf.keras.layers.Layer):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -477,11 +477,11 @@ class TFSpeech2TextDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -345,11 +345,11 @@ class Speech2Text2DecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -584,7 +584,7 @@ class TableTransformerEncoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
@ -668,7 +668,7 @@ class TableTransformerDecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
@ -679,7 +679,7 @@ class TableTransformerDecoderLayer(nn.Module):
|
|||
position embeddings that are added to the queries and keys
|
||||
in the self-attention layer.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
|
||||
values.
|
||||
|
|
|
@ -358,11 +358,11 @@ class TrOCRDecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -313,7 +313,7 @@ class TFWhisperEncoderLayer(tf.keras.layers.Layer):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -391,11 +391,11 @@ class TFWhisperDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -348,11 +348,11 @@ class TFXGLMDecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`):
|
||||
cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -1794,7 +1794,7 @@ class TF{{cookiecutter.camelcase_modelname}}EncoderLayer(tf.keras.layers.Layer):
|
|||
def call(self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, layer_head_mask: tf.Tensor, training=False):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
@ -1867,10 +1867,10 @@ class TF{{cookiecutter.camelcase_modelname}}DecoderLayer(tf.keras.layers.Layer):
|
|||
) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`tf.Tensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`tf.Tensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`tf.Tensor`): cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
encoder_hidden_states (`tf.Tensor`): cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`tf.Tensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`tf.Tensor`): mask for attention heads in a given layer of size
|
||||
|
|
|
@ -1826,7 +1826,7 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -1907,10 +1907,10 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
|
|||
):
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`): cross attention input to the layer of shape *(seq_len, batch, embed_dim)*
|
||||
encoder_hidden_states (`torch.FloatTensor`): cross attention input to the layer of shape *(batch, seq_len, embed_dim)*
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
*(batch, 1, tgt_len, src_len)* where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
|
Loading…
Reference in New Issue