Add `cosine_with_min_lr` scheduler in Trainer (#29341)

* Add cosine_with_min_lr scheduler

* Update error message for missing min_lr or min_lr_rate
This commit is contained in:
Yanyi Liu 2024-03-26 20:57:07 +08:00 committed by GitHub
parent 998b5bb56f
commit ef60995858
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 88 additions and 0 deletions

View File

@ -324,6 +324,69 @@ def get_inverse_sqrt_schedule(
return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)
def _get_cosine_schedule_with_warmup_lr_lambda(
current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float, min_lr_rate: float = 0.0
):
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
factor = 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
factor = factor * (1 - min_lr_rate) + min_lr_rate
return max(0, factor)
def get_cosine_with_min_lr_schedule_with_warmup(
optimizer: Optimizer,
num_warmup_steps: int,
num_training_steps: int,
num_cycles: float = 0.5,
last_epoch: int = -1,
min_lr: float = None,
min_lr_rate: float = None,
):
"""
Create a schedule with a learning rate that decreases following the values of the cosine function between the
initial lr set in the optimizer to min_lr, after a warmup period during which it increases linearly between 0 and the
initial lr set in the optimizer.
Args:
optimizer ([`~torch.optim.Optimizer`]):
The optimizer for which to schedule the learning rate.
num_warmup_steps (`int`):
The number of steps for the warmup phase.
num_training_steps (`int`):
The total number of training steps.
num_cycles (`float`, *optional*, defaults to 0.5):
The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
following a half-cosine).
last_epoch (`int`, *optional*, defaults to -1):
The index of the last epoch when resuming training.
min_lr (`float`, *optional*):
The minimum learning rate to reach after the cosine schedule.
min_lr_rate (`float`, *optional*):
The minimum learning rate as a ratio of the initial learning rate. If set, `min_lr` should not be set.
Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""
if min_lr is not None and min_lr_rate is not None:
raise ValueError("Only one of min_lr or min_lr_rate should be set")
elif min_lr is not None:
min_lr_rate = min_lr / optimizer.defaults["lr"]
elif min_lr_rate is None:
raise ValueError("One of min_lr or min_lr_rate should be set through the `lr_scheduler_kwargs`")
lr_lambda = partial(
_get_cosine_schedule_with_warmup_lr_lambda,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_cycles=num_cycles,
min_lr_rate=min_lr_rate,
)
return LambdaLR(optimizer, lr_lambda, last_epoch)
TYPE_TO_SCHEDULER_FUNCTION = {
SchedulerType.LINEAR: get_linear_schedule_with_warmup,
SchedulerType.COSINE: get_cosine_schedule_with_warmup,
@ -333,6 +396,7 @@ TYPE_TO_SCHEDULER_FUNCTION = {
SchedulerType.CONSTANT_WITH_WARMUP: get_constant_schedule_with_warmup,
SchedulerType.INVERSE_SQRT: get_inverse_sqrt_schedule,
SchedulerType.REDUCE_ON_PLATEAU: get_reduce_on_plateau_schedule,
SchedulerType.COSINE_WITH_MIN_LR: get_cosine_with_min_lr_schedule_with_warmup,
}

View File

@ -408,6 +408,7 @@ class SchedulerType(ExplicitEnum):
CONSTANT_WITH_WARMUP = "constant_with_warmup"
INVERSE_SQRT = "inverse_sqrt"
REDUCE_ON_PLATEAU = "reduce_lr_on_plateau"
COSINE_WITH_MIN_LR = "cosine_with_min_lr"
class TrainerMemoryTracker:

View File

@ -706,6 +706,29 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
self.assertEqual(sched1.lr_lambdas[0].args, sched2.lr_lambdas[0].args)
self.assertEqual(sched1.lr_lambdas[0].keywords, sched2.lr_lambdas[0].keywords)
def test_cosine_with_min_lr_scheduler(self):
train_dataset = RegressionDataset()
model = RegressionModel()
num_steps, num_warmup_steps = 10, 2
extra_kwargs = {"min_lr": 1e-5} # Non-default arguments
args = TrainingArguments(
"./regression",
lr_scheduler_type="cosine_with_min_lr",
lr_scheduler_kwargs=extra_kwargs,
learning_rate=0.2,
warmup_steps=num_warmup_steps,
)
trainer = Trainer(model, args, train_dataset=train_dataset)
trainer.create_optimizer_and_scheduler(num_training_steps=num_steps)
# Checking that the scheduler was created
self.assertIsNotNone(trainer.lr_scheduler)
# Check the last learning rate
for _ in range(num_steps):
trainer.lr_scheduler.step()
self.assertEqual(trainer.lr_scheduler.get_last_lr()[0], 1e-5)
def test_reduce_lr_on_plateau_args(self):
# test passed arguments for a custom ReduceLROnPlateau scheduler
train_dataset = RegressionDataset(length=64)