Fix windows err with checkpoint race conditions (#28637)

Fix windows err
This commit is contained in:
Zach Mueller 2024-01-23 14:30:36 +01:00 committed by Amy Roberts
parent b94f5fdd7e
commit 3001543b94
1 changed files with 5 additions and 3 deletions

View File

@ -2415,9 +2415,11 @@ class Trainer:
os.rename(staging_output_dir, output_dir)
# Ensure rename completed in cases where os.rename is not atomic
fd = os.open(output_dir, os.O_RDONLY)
os.fsync(fd)
os.close(fd)
# And can only happen on non-windows based systems
if os.name != "nt":
fd = os.open(output_dir, os.O_RDONLY)
os.fsync(fd)
os.close(fd)
# Maybe delete some older checkpoints.
if self.args.should_save: