TF Flaubert w/ pre-norm (#6841)
This commit is contained in:
parent
4561f05c5f
commit
895d394669
|
@ -296,7 +296,7 @@ class TFFlaubertMainLayer(TFXLMMainLayer):
|
|||
else:
|
||||
tensor_normalized = self.layer_norm1[i](tensor)
|
||||
attn_outputs = self.attentions[i](
|
||||
tensor_normalized, attn_mask, None, cache, head_mask[i], training=training
|
||||
tensor_normalized, attn_mask, None, cache, head_mask[i], output_attentions, training=training
|
||||
)
|
||||
attn = attn_outputs[0]
|
||||
if output_attentions:
|
||||
|
|
Loading…
Reference in New Issue