fix training schedules in examples to match new API
This commit is contained in:
parent
c36cca075a
commit
d94c6b0144
|
@ -14,7 +14,7 @@ from tqdm import tqdm
|
||||||
|
|
||||||
from pytorch_pretrained_bert.modeling import BertForPreTraining
|
from pytorch_pretrained_bert.modeling import BertForPreTraining
|
||||||
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
||||||
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
|
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
|
||||||
|
|
||||||
InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")
|
InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")
|
||||||
|
|
||||||
|
@ -268,7 +268,8 @@ def main():
|
||||||
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
||||||
else:
|
else:
|
||||||
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
||||||
|
warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion,
|
||||||
|
t_total=num_train_optimization_steps)
|
||||||
else:
|
else:
|
||||||
optimizer = BertAdam(optimizer_grouped_parameters,
|
optimizer = BertAdam(optimizer_grouped_parameters,
|
||||||
lr=args.learning_rate,
|
lr=args.learning_rate,
|
||||||
|
@ -314,7 +315,7 @@ def main():
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps,
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||||
args.warmup_proportion)
|
args.warmup_proportion)
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
|
|
|
@ -31,7 +31,7 @@ from tqdm import tqdm, trange
|
||||||
|
|
||||||
from pytorch_pretrained_bert.modeling import BertForPreTraining
|
from pytorch_pretrained_bert.modeling import BertForPreTraining
|
||||||
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
||||||
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
|
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
||||||
datefmt='%m/%d/%Y %H:%M:%S',
|
datefmt='%m/%d/%Y %H:%M:%S',
|
||||||
|
@ -556,6 +556,8 @@ def main():
|
||||||
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
||||||
else:
|
else:
|
||||||
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
||||||
|
warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion,
|
||||||
|
t_total=num_train_optimization_steps)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
optimizer = BertAdam(optimizer_grouped_parameters,
|
optimizer = BertAdam(optimizer_grouped_parameters,
|
||||||
|
@ -601,7 +603,8 @@ def main():
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||||
|
args.warmup_proportion)
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
|
@ -38,7 +38,7 @@ from sklearn.metrics import matthews_corrcoef, f1_score
|
||||||
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
|
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
|
||||||
from pytorch_pretrained_bert.modeling import BertForSequenceClassification, BertConfig
|
from pytorch_pretrained_bert.modeling import BertForSequenceClassification, BertConfig
|
||||||
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
||||||
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
|
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -784,6 +784,8 @@ def main():
|
||||||
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
||||||
else:
|
else:
|
||||||
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
||||||
|
warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion,
|
||||||
|
t_total=num_train_optimization_steps)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
optimizer = BertAdam(optimizer_grouped_parameters,
|
optimizer = BertAdam(optimizer_grouped_parameters,
|
||||||
|
@ -852,7 +854,8 @@ def main():
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||||
|
args.warmup_proportion)
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
|
@ -36,7 +36,7 @@ from tqdm import tqdm, trange
|
||||||
|
|
||||||
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
|
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
|
||||||
from pytorch_pretrained_bert.modeling import BertForQuestionAnswering, BertConfig
|
from pytorch_pretrained_bert.modeling import BertForQuestionAnswering, BertConfig
|
||||||
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
|
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
|
||||||
from pytorch_pretrained_bert.tokenization import (BasicTokenizer,
|
from pytorch_pretrained_bert.tokenization import (BasicTokenizer,
|
||||||
BertTokenizer,
|
BertTokenizer,
|
||||||
whitespace_tokenize)
|
whitespace_tokenize)
|
||||||
|
@ -949,6 +949,8 @@ def main():
|
||||||
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
||||||
else:
|
else:
|
||||||
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
||||||
|
warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion,
|
||||||
|
t_total=num_train_optimization_steps)
|
||||||
else:
|
else:
|
||||||
optimizer = BertAdam(optimizer_grouped_parameters,
|
optimizer = BertAdam(optimizer_grouped_parameters,
|
||||||
lr=args.learning_rate,
|
lr=args.learning_rate,
|
||||||
|
@ -1013,7 +1015,8 @@ def main():
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used and handles this automatically
|
# if args.fp16 is False, BertAdam is used and handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||||
|
args.warmup_proportion)
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
|
@ -34,7 +34,7 @@ from tqdm import tqdm, trange
|
||||||
|
|
||||||
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
|
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
|
||||||
from pytorch_pretrained_bert.modeling import BertForMultipleChoice, BertConfig
|
from pytorch_pretrained_bert.modeling import BertForMultipleChoice, BertConfig
|
||||||
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
|
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
|
||||||
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
from pytorch_pretrained_bert.tokenization import BertTokenizer
|
||||||
|
|
||||||
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
||||||
|
@ -411,6 +411,8 @@ def main():
|
||||||
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
|
||||||
else:
|
else:
|
||||||
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
|
||||||
|
warmup_linear = WarmupLinearSchedule(warmup=args.warmup_proportion,
|
||||||
|
t_total=num_train_optimization_steps)
|
||||||
else:
|
else:
|
||||||
optimizer = BertAdam(optimizer_grouped_parameters,
|
optimizer = BertAdam(optimizer_grouped_parameters,
|
||||||
lr=args.learning_rate,
|
lr=args.learning_rate,
|
||||||
|
@ -464,7 +466,8 @@ def main():
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step/num_train_optimization_steps,
|
||||||
|
args.warmup_proportion)
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
Loading…
Reference in New Issue