Division to num_train_optimizer of global_step in lr_this_step is removed.
This commit is contained in:
parent
3ae8c8be1e
commit
5289b4b9e0
|
@ -315,7 +315,7 @@ def main():
|
|||
if args.fp16:
|
||||
# modify learning rate with special warm up BERT uses
|
||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
||||
args.warmup_proportion)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr_this_step
|
||||
|
|
|
@ -603,7 +603,7 @@ def main():
|
|||
if args.fp16:
|
||||
# modify learning rate with special warm up BERT uses
|
||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
||||
args.warmup_proportion)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr_this_step
|
||||
|
|
|
@ -854,7 +854,7 @@ def main():
|
|||
if args.fp16:
|
||||
# modify learning rate with special warm up BERT uses
|
||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
||||
args.warmup_proportion)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr_this_step
|
||||
|
|
|
@ -1015,7 +1015,7 @@ def main():
|
|||
if args.fp16:
|
||||
# modify learning rate with special warm up BERT uses
|
||||
# if args.fp16 is False, BertAdam is used and handles this automatically
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
||||
args.warmup_proportion)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr_this_step
|
||||
|
|
|
@ -466,7 +466,7 @@ def main():
|
|||
if args.fp16:
|
||||
# modify learning rate with special warm up BERT uses
|
||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
||||
args.warmup_proportion)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr_this_step
|
||||
|
|
Loading…
Reference in New Issue