Fix small use_cache typo in the docs (#19191)

2022-09-28 05:03:20 -07:00 · 2022-09-28 05:03:20 -07:00 · 942fa8ced8
parent 2df602870b
commit 942fa8ced8
2 changed files with 2 additions and 2 deletions
--- a/src/transformers/generation_tf_utils.py
+++ b/src/transformers/generation_tf_utils.py
@ -475,7 +475,7 @@ class TFGenerationMixin:
                [What are attention masks?](../glossary#attention-mask)
            decoder_start_token_id (`int`, *optional*):
                If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token.
-            use_cache: (`bool`, *optional*, defaults to `True`):
+            use_cache (`bool`, *optional*, defaults to `True`):
                Whether or not the model should use the past last key/values attentions (if applicable to the model) to
                speed up decoding.
            output_attentions (`bool`, *optional*, defaults to `False`):
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@ -1033,7 +1033,7 @@ class GenerationMixin:
                as `input_ids` that masks the pad token. [What are attention masks?](../glossary#attention-mask)
            decoder_start_token_id (`int`, *optional*):
                If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token.
-            use_cache: (`bool`, *optional*, defaults to `True`):
+            use_cache (`bool`, *optional*, defaults to `True`):
                Whether or not the model should use the past last key/values attentions (if applicable to the model) to
                speed up decoding.
            num_beam_groups (`int`, *optional*, defaults to `model.config.num_beam_groups` or 1 if the config does not set any value):