From 36b60ce9e88656e10d1dbef2762a0b3540c37a5f Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Sat, 28 Nov 2020 19:50:49 +0100 Subject: [PATCH] fix mt5 config (#8832) --- src/transformers/models/mt5/configuration_mt5.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transformers/models/mt5/configuration_mt5.py b/src/transformers/models/mt5/configuration_mt5.py index 09e9ac2262..79a20e3264 100644 --- a/src/transformers/models/mt5/configuration_mt5.py +++ b/src/transformers/models/mt5/configuration_mt5.py @@ -60,6 +60,8 @@ class MT5Config(PretrainedConfig): testing). feed_forward_proj (:obj:`string`, `optional`, defaults to :obj:`"gated-gelu"`): Type of feed forward layer to be used. Should be one of :obj:`"relu"` or :obj:`"gated-gelu"`. + use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): + Whether or not the model should return the last key/values attentions (not used by all models). """ model_type = "mt5" keys_to_ignore_at_inference = ["past_key_values"] @@ -79,6 +81,7 @@ class MT5Config(PretrainedConfig): initializer_factor=1.0, feed_forward_proj="gated-gelu", is_encoder_decoder=True, + use_cache=True, tokenizer_class="T5Tokenizer", tie_word_embeddings=False, pad_token_id=0, @@ -109,6 +112,7 @@ class MT5Config(PretrainedConfig): self.layer_norm_epsilon = layer_norm_epsilon self.initializer_factor = initializer_factor self.feed_forward_proj = feed_forward_proj + self.use_cache = use_cache @property def hidden_size(self):