Prevent MLflow exception from disrupting training (#28779)
Modified MLflow logging metrics from synchronous to asynchronous Co-authored-by: codiceSpaghetti <alessio.ser@hotmail.it>
This commit is contained in:
parent
d703eaaeff
commit
a937425e94
|
@ -1036,7 +1036,7 @@ class MLflowCallback(TrainerCallback):
|
|||
f'Trainer is attempting to log a value of "{v}" of type {type(v)} for key "{k}" as a metric. '
|
||||
"MLflow's log_metric() only accepts float and int types so we dropped this attribute."
|
||||
)
|
||||
self._ml_flow.log_metrics(metrics=metrics, step=state.global_step)
|
||||
self._ml_flow.log_metrics(metrics=metrics, step=state.global_step, synchronous=False)
|
||||
|
||||
def on_train_end(self, args, state, control, **kwargs):
|
||||
if self._initialized and state.is_world_process_zero:
|
||||
|
|
Loading…
Reference in New Issue