transformers/examples/research_projects/seq2seq-distillation/finetune_pegasus_xsum.sh

15 lines
500 B
Bash
Executable File

#!/usr/bin/env bash
export PYTHONPATH="../":"${PYTHONPATH}"
# From appendix C of paper https://arxiv.org/abs/1912.08777
# Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16)
python finetune.py \
--learning_rate=1e-4 \
--do_train \
--do_predict \
--n_val 1000 \
--val_check_interval 0.25 \
--max_source_length 512 --max_target_length 56 \
--freeze_embeds --label_smoothing 0.1 --adafactor --task summarization_xsum \
"$@"