Correcting comments in T5Stack to reflect correct tuple order (#11330)
* Correcting comments to reflect correct tuple order In order to match the actual order (line 513 and 516, and as accessed in 968), I've changed the order mentioned in comments L962 and L966-967. * Update modeling_t5.py Updating another comment as well * Removing extra space * Fixing style and quality * style & quality * Update src/transformers/models/t5/modeling_t5.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
parent
0b93358447
commit
e6126e1932
|
@ -701,7 +701,7 @@ class T5Block(nn.Module):
|
|||
else:
|
||||
outputs = outputs + attention_outputs
|
||||
|
||||
return outputs # hidden-states, present_key_value_states, (self-attention weights), (self-attention position bias), (cross-attention weights), (cross-attention position bias)
|
||||
return outputs # hidden-states, present_key_value_states, (self-attention position bias), (self-attention weights), (cross-attention position bias), (cross-attention weights)
|
||||
|
||||
|
||||
class T5PreTrainedModel(PreTrainedModel):
|
||||
|
@ -1009,14 +1009,15 @@ class T5Stack(T5PreTrainedModel):
|
|||
)
|
||||
|
||||
# layer_outputs is a tuple with:
|
||||
# hidden-states, key-value-states, (self-attention weights), (self-attention position bias), (cross-attention weights), (cross-attention position bias)
|
||||
# hidden-states, key-value-states, (self-attention position bias), (self-attention weights), (cross-attention position bias), (cross-attention weights)
|
||||
if use_cache is False:
|
||||
layer_outputs = layer_outputs[:1] + (None,) + layer_outputs[1:]
|
||||
|
||||
hidden_states, present_key_value_state = layer_outputs[:2]
|
||||
|
||||
# We share the position biases between the layers - the first layer store them
|
||||
# layer_outputs = hidden-states, key-value-states (self-attention weights),
|
||||
# (self-attention position bias), (cross-attention weights), (cross-attention position bias)
|
||||
# layer_outputs = hidden-states, key-value-states (self-attention position bias), (self-attention weights),
|
||||
# (cross-attention position bias), (cross-attention weights)
|
||||
position_bias = layer_outputs[2]
|
||||
if self.is_decoder and encoder_hidden_states is not None:
|
||||
encoder_decoder_position_bias = layer_outputs[4 if output_attentions else 3]
|
||||
|
|
Loading…
Reference in New Issue