FSDP bug fix for `load_state_dict` (#18596)
This commit is contained in:
parent
d344534bf6
commit
4eed2beca0
|
@ -1935,7 +1935,9 @@ class Trainer:
|
||||||
else:
|
else:
|
||||||
# We load the model state dict on the CPU to avoid an OOM error.
|
# We load the model state dict on the CPU to avoid an OOM error.
|
||||||
state_dict = torch.load(os.path.join(resume_from_checkpoint, WEIGHTS_NAME), map_location="cpu")
|
state_dict = torch.load(os.path.join(resume_from_checkpoint, WEIGHTS_NAME), map_location="cpu")
|
||||||
load_result = model.load_state_dict(state_dict, strict=False)
|
# workaround for FSDP bug https://github.com/pytorch/pytorch/issues/82963
|
||||||
|
# which takes *args instead of **kwargs
|
||||||
|
load_result = model.load_state_dict(state_dict, False)
|
||||||
# release memory
|
# release memory
|
||||||
del state_dict
|
del state_dict
|
||||||
self._issue_warnings_after_load(load_result)
|
self._issue_warnings_after_load(load_result)
|
||||||
|
@ -1989,7 +1991,9 @@ class Trainer:
|
||||||
# We load the model state dict on the CPU to avoid an OOM error.
|
# We load the model state dict on the CPU to avoid an OOM error.
|
||||||
state_dict = torch.load(best_model_path, map_location="cpu")
|
state_dict = torch.load(best_model_path, map_location="cpu")
|
||||||
# If the model is on the GPU, it still works!
|
# If the model is on the GPU, it still works!
|
||||||
load_result = model.load_state_dict(state_dict, strict=False)
|
# workaround for FSDP bug https://github.com/pytorch/pytorch/issues/82963
|
||||||
|
# which takes *args instead of **kwargs
|
||||||
|
load_result = model.load_state_dict(state_dict, False)
|
||||||
if not is_sagemaker_mp_enabled():
|
if not is_sagemaker_mp_enabled():
|
||||||
self._issue_warnings_after_load(load_result)
|
self._issue_warnings_after_load(load_result)
|
||||||
elif os.path.exists(os.path.join(self.state.best_model_checkpoint, WEIGHTS_INDEX_NAME)):
|
elif os.path.exists(os.path.join(self.state.best_model_checkpoint, WEIGHTS_INDEX_NAME)):
|
||||||
|
|
Loading…
Reference in New Issue