diff --git a/examples/lm_finetuning/finetune_on_pregenerated.py b/examples/lm_finetuning/finetune_on_pregenerated.py index 1638b02a6f..400be6cdd2 100644 --- a/examples/lm_finetuning/finetune_on_pregenerated.py +++ b/examples/lm_finetuning/finetune_on_pregenerated.py @@ -315,7 +315,7 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step diff --git a/examples/lm_finetuning/simple_lm_finetuning.py b/examples/lm_finetuning/simple_lm_finetuning.py index 6511ead590..da01daf4f0 100644 --- a/examples/lm_finetuning/simple_lm_finetuning.py +++ b/examples/lm_finetuning/simple_lm_finetuning.py @@ -603,7 +603,7 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 89ab96f50a..17d8f72c37 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -854,7 +854,7 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step diff --git a/examples/run_squad.py b/examples/run_squad.py index c3fdf03774..c7b93ac1dd 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -1015,7 +1015,7 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used and handles this automatically - lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step diff --git a/examples/run_swag.py b/examples/run_swag.py index 89f4bdf868..eb5c869553 100644 --- a/examples/run_swag.py +++ b/examples/run_swag.py @@ -466,7 +466,7 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps, + lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step