description:The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.
type:integer
default:128
- name:per_gpu_train_batch_size
pass-as:--per_gpu_train_batch_size={v}
description:Batch size per GPU/CPU for training.
type:integer
default:8
- name:per_gpu_eval_batch_size
pass-as:--per_gpu_eval_batch_size={v}
description:Batch size per GPU/CPU for evaluation.
type:integer
default:8
- name:gradient_accumulation_steps
pass-as:--gradient_accumulation_steps={v}
description:Number of updates steps to accumulate before performing a backward/update pass.
type:integer
default:1
- name:learning_rate
pass-as:--learning_rate={v}
description:The initial learning rate for Adam.
type:float
default:0.00005
- name:adam_epsilon
pass-as:--adam_epsilon={v}
description:Epsilon for Adam optimizer.
type:float
default:0.00000001
- name:max_grad_norm
pass-as:--max_grad_norm={v}
description:Max gradient norm.
type:float
default:1.0
- name:num_train_epochs
pass-as:--num_train_epochs={v}
description:Total number of training epochs to perform.
type:integer
default:3
- name:max_steps
pass-as:--max_steps={v}
description:If > 0, set total number of training steps to perform. Override num_train_epochs.
type:integer
default:-1
- name:warmup_steps
pass-as:--warmup_steps={v}
description:Linear warmup over warmup_steps.
type:integer
default:-1
- name:logging_steps
pass-as:--logging_steps={v}
description:Log every X updates steps.
type:integer
default:25
- name:save_steps
pass-as:--save_steps={v}
description:Save checkpoint every X updates steps.