Prevent MLflow exception from disrupting training (#28779)

Modified MLflow logging metrics from synchronous to asynchronous

Co-authored-by: codiceSpaghetti <alessio.ser@hotmail.it>
This commit is contained in:
Alessio Serra 2024-01-31 02:10:44 +01:00 committed by GitHub
parent d703eaaeff
commit a937425e94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 1 additions and 1 deletions

View File

@ -1036,7 +1036,7 @@ class MLflowCallback(TrainerCallback):
f'Trainer is attempting to log a value of "{v}" of type {type(v)} for key "{k}" as a metric. '
"MLflow's log_metric() only accepts float and int types so we dropped this attribute."
)
self._ml_flow.log_metrics(metrics=metrics, step=state.global_step)
self._ml_flow.log_metrics(metrics=metrics, step=state.global_step, synchronous=False)
def on_train_end(self, args, state, control, **kwargs):
if self._initialized and state.is_world_process_zero: