[trainer] sanity checks for `save_steps=0|None` and `logging_steps=0` (#12796)

* [trainer] fix % 0 * sanity checks * fix logging_strategy * correction * Update src/transformers/training_args.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2021-07-20 09:05:26 -07:00 · 2021-07-20 09:05:26 -07:00 · cabcc75171
parent acdd78db08
commit cabcc75171
3 changed files with 16 additions and 7 deletions
--- a/src/transformers/trainer_callback.py
+++ b/src/transformers/trainer_callback.py
@ -403,11 +403,7 @@ class DefaultFlowCallback(TrainerCallback):
        # Log
        if state.global_step == 1 and args.logging_first_step:
            control.should_log = True
-        if (
-            args.logging_strategy == IntervalStrategy.STEPS
-            and args.logging_steps > 0
-            and state.global_step % args.logging_steps == 0
-        ):
+        if args.logging_strategy == IntervalStrategy.STEPS and state.global_step % args.logging_steps == 0:
            control.should_log = True

        # Evaluate
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@ -663,8 +663,20 @@ class TrainingArguments:
        self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type)
        if self.do_eval is False and self.evaluation_strategy != IntervalStrategy.NO:
            self.do_eval = True
-        if self.eval_steps is None:
-            self.eval_steps = self.logging_steps
+
+        # eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero
+        if self.evaluation_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0):
+            if self.logging_steps > 0:
+                logger.info(f"using `logging_steps` to initialize `eval_steps` to {self.logging_steps}")
+                self.eval_steps = self.logging_steps
+            else:
+                raise ValueError(
+                    f"evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or --logging_steps"
+                )
+
+        # logging_steps must be non-zero for logging_strategy that is other than 'no'
+        if self.logging_strategy == IntervalStrategy.STEPS and self.logging_steps == 0:
+            raise ValueError(f"logging strategy {self.logging_strategy} requires non-zero --logging_steps")

        # Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible.
        if self.load_best_model_at_end:
--- a/tests/extended/test_trainer_ext.py
+++ b/tests/extended/test_trainer_ext.py
@ -248,6 +248,7 @@ class TestTrainerExt(TestCasePlus):
            --learning_rate {learning_rate}
            --warmup_steps 8
            --logging_steps 0
+            --logging_strategy no
            --save_steps {str(eval_steps)}
            --group_by_length
            --label_smoothing_factor 0.1