Fix template (#9697)
This commit is contained in:
parent
14042d560f
commit
7251a4736d
|
@ -307,7 +307,7 @@ class TFRobertaPooler(tf.keras.layers.Layer):
|
||||||
return pooled_output
|
return pooled_output
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention
|
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention with Bert->Roberta
|
||||||
class TFRobertaSelfAttention(tf.keras.layers.Layer):
|
class TFRobertaSelfAttention(tf.keras.layers.Layer):
|
||||||
def __init__(self, config, **kwargs):
|
def __init__(self, config, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
@ -355,7 +355,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer):
|
||||||
attention_scores = tf.einsum("aecd,abcd->acbe", key_layer, query_layer)
|
attention_scores = tf.einsum("aecd,abcd->acbe", key_layer, query_layer)
|
||||||
|
|
||||||
if attention_mask is not None:
|
if attention_mask is not None:
|
||||||
# Apply the attention mask is (precomputed for all layers in TFBertModel call() function)
|
# Apply the attention mask is (precomputed for all layers in TFRobertaModel call() function)
|
||||||
attention_scores = attention_scores + attention_mask
|
attention_scores = attention_scores + attention_mask
|
||||||
|
|
||||||
# Normalize the attention scores to probabilities.
|
# Normalize the attention scores to probabilities.
|
||||||
|
@ -375,7 +375,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer):
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput with Bert->Roberta
|
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput
|
||||||
class TFRobertaSelfOutput(tf.keras.layers.Layer):
|
class TFRobertaSelfOutput(tf.keras.layers.Layer):
|
||||||
def __init__(self, config, **kwargs):
|
def __init__(self, config, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
|
@ -241,7 +241,7 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention
|
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention with Bert->{{cookiecutter.camelcase_modelname}}
|
||||||
class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer):
|
class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer):
|
||||||
def __init__(self, config, **kwargs):
|
def __init__(self, config, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
@ -309,7 +309,7 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput with Bert->{{cookiecutter.camelcase_modelname}}
|
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput
|
||||||
class TF{{cookiecutter.camelcase_modelname}}SelfOutput(tf.keras.layers.Layer):
|
class TF{{cookiecutter.camelcase_modelname}}SelfOutput(tf.keras.layers.Layer):
|
||||||
def __init__(self, config, **kwargs):
|
def __init__(self, config, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
Loading…
Reference in New Issue