🚨🚨🚨Deprecate `evaluation_strategy` to `eval_strategy`🚨🚨🚨 (#30190)
* Alias * Note alias * Tests and src * Rest * Clean * Change typing? * Fix tests * Deprecation versions
This commit is contained in:
parent
c86d020ead
commit
60d5f8f9f0
|
@ -128,12 +128,12 @@ Rufen Sie [`~evaluate.compute`] auf `metric` auf, um die Genauigkeit Ihrer Vorhe
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
Wenn Sie Ihre Bewertungsmetriken während der Feinabstimmung überwachen möchten, geben Sie den Parameter `evaluation_strategy` in Ihren Trainingsargumenten an, um die Bewertungsmetrik am Ende jeder Epoche zu ermitteln:
|
||||
Wenn Sie Ihre Bewertungsmetriken während der Feinabstimmung überwachen möchten, geben Sie den Parameter `eval_strategy` in Ihren Trainingsargumenten an, um die Bewertungsmetrik am Ende jeder Epoche zu ermitteln:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments, Trainer
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### Trainer
|
||||
|
|
|
@ -145,7 +145,7 @@ arguments:
|
|||
```py
|
||||
default_args = {
|
||||
"output_dir": "tmp",
|
||||
"evaluation_strategy": "steps",
|
||||
"eval_strategy": "steps",
|
||||
"num_train_epochs": 1,
|
||||
"log_level": "error",
|
||||
"report_to": "none",
|
||||
|
|
|
@ -270,7 +270,7 @@ At this point, only three steps remain:
|
|||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... group_by_length=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=8,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -221,7 +221,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_mind_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=3e-5,
|
||||
... per_device_train_batch_size=32,
|
||||
|
|
|
@ -399,7 +399,7 @@ In this case the `output_dir` will also be the name of the repo where your model
|
|||
... num_train_epochs=20,
|
||||
... save_steps=200,
|
||||
... logging_steps=50,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... learning_rate=5e-5,
|
||||
... save_total_limit=2,
|
||||
... remove_unused_columns=False,
|
||||
|
|
|
@ -196,7 +196,7 @@ training_args = TrainingArguments(
|
|||
per_device_eval_batch_size=32,
|
||||
gradient_accumulation_steps=2,
|
||||
save_total_limit=3,
|
||||
evaluation_strategy="steps",
|
||||
eval_strategy="steps",
|
||||
eval_steps=50,
|
||||
save_strategy="steps",
|
||||
save_steps=50,
|
||||
|
|
|
@ -302,7 +302,7 @@ At this point, only three steps remain:
|
|||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_food_model",
|
||||
... remove_unused_columns=False,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
|
|
|
@ -112,7 +112,7 @@ training_args = TrainingArguments(
|
|||
fp16=True,
|
||||
logging_dir=f"{repo_name}/logs",
|
||||
logging_strategy="epoch",
|
||||
evaluation_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
load_best_model_at_end=True,
|
||||
metric_for_best_model="accuracy",
|
||||
|
|
|
@ -249,7 +249,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_eli5_clm-model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... weight_decay=0.01,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -238,7 +238,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_eli5_mlm_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... num_train_epochs=3,
|
||||
... weight_decay=0.01,
|
||||
|
|
|
@ -265,7 +265,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_swag_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... learning_rate=5e-5,
|
||||
|
|
|
@ -218,7 +218,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_qa_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -535,7 +535,7 @@ At this point, only three steps remain:
|
|||
... per_device_train_batch_size=2,
|
||||
... per_device_eval_batch_size=2,
|
||||
... save_total_limit=3,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... save_strategy="steps",
|
||||
... save_steps=20,
|
||||
... eval_steps=20,
|
||||
|
|
|
@ -187,7 +187,7 @@ At this point, only three steps remain:
|
|||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=2,
|
||||
... weight_decay=0.01,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -202,7 +202,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="my_awesome_billsum_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -477,7 +477,7 @@ only look at the loss:
|
|||
... max_steps=4000,
|
||||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=2,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -290,7 +290,7 @@ At this point, only three steps remain:
|
|||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=2,
|
||||
... weight_decay=0.01,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -209,7 +209,7 @@ At this point, only three steps remain:
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="my_awesome_opus_books_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -354,7 +354,7 @@ Most of the training arguments are self-explanatory, but one that is quite impor
|
|||
>>> args = TrainingArguments(
|
||||
... new_model_name,
|
||||
... remove_unused_columns=False,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=batch_size,
|
||||
|
|
|
@ -62,7 +62,7 @@ training_args = TrainingArguments(
|
|||
per_device_eval_batch_size=16,
|
||||
num_train_epochs=2,
|
||||
weight_decay=0.01,
|
||||
evaluation_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
load_best_model_at_end=True,
|
||||
push_to_hub=True,
|
||||
|
|
|
@ -128,12 +128,12 @@ Call [`~evaluate.compute`] on `metric` to calculate the accuracy of your predict
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
If you'd like to monitor your evaluation metrics during fine-tuning, specify the `evaluation_strategy` parameter in your training arguments to report the evaluation metric at the end of each epoch:
|
||||
If you'd like to monitor your evaluation metrics during fine-tuning, specify the `eval_strategy` parameter in your training arguments to report the evaluation metric at the end of each epoch:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments, Trainer
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### Trainer
|
||||
|
|
|
@ -260,7 +260,7 @@ En este punto, solo quedan tres pasos:
|
|||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... group_by_length=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=8,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -188,7 +188,7 @@ training_args = TrainingArguments(
|
|||
per_device_eval_batch_size=32,
|
||||
gradient_accumulation_steps=2,
|
||||
save_total_limit=3,
|
||||
evaluation_strategy="steps",
|
||||
eval_strategy="steps",
|
||||
eval_steps=50,
|
||||
save_strategy="steps",
|
||||
save_steps=50,
|
||||
|
|
|
@ -143,7 +143,7 @@ Al llegar a este punto, solo quedan tres pasos:
|
|||
>>> training_args = TrainingArguments(
|
||||
... output_dir="./results",
|
||||
... per_device_train_batch_size=16,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... num_train_epochs=4,
|
||||
... fp16=True,
|
||||
... save_steps=100,
|
||||
|
|
|
@ -232,7 +232,7 @@ A este punto, solo faltan tres pasos:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="./results",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... weight_decay=0.01,
|
||||
... )
|
||||
|
@ -338,7 +338,7 @@ A este punto, solo faltan tres pasos:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="./results",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... num_train_epochs=3,
|
||||
... weight_decay=0.01,
|
||||
|
|
|
@ -212,7 +212,7 @@ En este punto, solo quedan tres pasos:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="./results",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -182,7 +182,7 @@ En este punto, solo quedan tres pasos:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="./results",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -140,7 +140,7 @@ En este punto, solo faltan tres pasos:
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="./results",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -60,7 +60,7 @@ training_args = TrainingArguments(
|
|||
per_device_eval_batch_size=16,
|
||||
num_train_epochs=2,
|
||||
weight_decay=0.01,
|
||||
evaluation_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
load_best_model_at_end=True,
|
||||
push_to_hub=True,
|
||||
|
|
|
@ -120,12 +120,12 @@ Define la función `compute` en `metric` para calcular el accuracy de tus predic
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
Si quieres controlar tus métricas de evaluación durante el fine-tuning, especifica el parámetro `evaluation_strategy` en tus argumentos de entrenamiento para que el modelo tenga en cuenta la métrica de evaluación al final de cada época:
|
||||
Si quieres controlar tus métricas de evaluación durante el fine-tuning, especifica el parámetro `eval_strategy` en tus argumentos de entrenamiento para que el modelo tenga en cuenta la métrica de evaluación al final de cada época:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### Trainer
|
||||
|
|
|
@ -167,7 +167,7 @@ Per quanto riguarda la classe `Trainer`:
|
|||
- Il metodo `is_world_master` di `Trainer` è deprecato a favore di `is_world_process_zero`.
|
||||
|
||||
Per quanto riguarda la classe `TrainingArguments`:
|
||||
- L'argomento `evaluate_during_training` di `TrainingArguments` è deprecato a favore di `evaluation_strategy`.
|
||||
- L'argomento `evaluate_during_training` di `TrainingArguments` è deprecato a favore di `eval_strategy`.
|
||||
|
||||
Per quanto riguarda il modello Transfo-XL:
|
||||
- L'attributo di configurazione `tie_weight` di Transfo-XL diventa `tie_words_embeddings`.
|
||||
|
|
|
@ -121,12 +121,12 @@ Richiama `compute` su `metric` per calcolare l'accuratezza delle tue previsioni.
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
Se preferisci monitorare le tue metriche di valutazione durante il fine-tuning, specifica il parametro `evaluation_strategy` nei tuoi training arguments per restituire le metriche di valutazione ad ogni epoca di addestramento:
|
||||
Se preferisci monitorare le tue metriche di valutazione durante il fine-tuning, specifica il parametro `eval_strategy` nei tuoi training arguments per restituire le metriche di valutazione ad ogni epoca di addestramento:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments, Trainer
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### Trainer
|
||||
|
|
|
@ -136,7 +136,7 @@ Tue Jan 11 08:58:05 2022
|
|||
```py
|
||||
default_args = {
|
||||
"output_dir": "tmp",
|
||||
"evaluation_strategy": "steps",
|
||||
"eval_strategy": "steps",
|
||||
"num_train_epochs": 1,
|
||||
"log_level": "error",
|
||||
"report_to": "none",
|
||||
|
|
|
@ -270,7 +270,7 @@ MInDS-14 データセットのサンプリング レートは 8000kHz です (
|
|||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... group_by_length=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=8,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -221,7 +221,7 @@ MInDS-14 データセットのサンプリング レートは 8000khz です (
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_mind_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=3e-5,
|
||||
... per_device_train_batch_size=32,
|
||||
|
|
|
@ -403,7 +403,7 @@ end_index 18
|
|||
... num_train_epochs=20,
|
||||
... save_steps=200,
|
||||
... logging_steps=50,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... learning_rate=5e-5,
|
||||
... save_total_limit=2,
|
||||
... remove_unused_columns=False,
|
||||
|
|
|
@ -194,7 +194,7 @@ training_args = TrainingArguments(
|
|||
per_device_eval_batch_size=32,
|
||||
gradient_accumulation_steps=2,
|
||||
save_total_limit=3,
|
||||
evaluation_strategy="steps",
|
||||
eval_strategy="steps",
|
||||
eval_steps=50,
|
||||
save_strategy="steps",
|
||||
save_steps=50,
|
||||
|
|
|
@ -308,7 +308,7 @@ food["test"].set_transform(preprocess_val)
|
|||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_food_model",
|
||||
... remove_unused_columns=False,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
|
|
|
@ -112,7 +112,7 @@ training_args = TrainingArguments(
|
|||
fp16=True,
|
||||
logging_dir=f"{repo_name}/logs",
|
||||
logging_strategy="epoch",
|
||||
evaluation_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
load_best_model_at_end=True,
|
||||
metric_for_best_model="accuracy",
|
||||
|
|
|
@ -246,7 +246,7 @@ Apply the `group_texts` function over the entire dataset:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_eli5_clm-model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... weight_decay=0.01,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -231,7 +231,7 @@ pip install transformers datasets evaluate
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_eli5_mlm_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... num_train_epochs=3,
|
||||
... weight_decay=0.01,
|
||||
|
|
|
@ -266,7 +266,7 @@ tokenized_swag = swag.map(preprocess_function, batched=True)
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_swag_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... learning_rate=5e-5,
|
||||
|
|
|
@ -220,7 +220,7 @@ pip install transformers datasets evaluate
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_qa_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -323,7 +323,7 @@ pip install -q datasets transformers evaluate
|
|||
... per_device_train_batch_size=2,
|
||||
... per_device_eval_batch_size=2,
|
||||
... save_total_limit=3,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... save_strategy="steps",
|
||||
... save_steps=20,
|
||||
... eval_steps=20,
|
||||
|
|
|
@ -324,7 +324,7 @@ pip install -q datasets transformers evaluate
|
|||
... per_device_train_batch_size=2,
|
||||
... per_device_eval_batch_size=2,
|
||||
... save_total_limit=3,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... save_strategy="steps",
|
||||
... save_steps=20,
|
||||
... eval_steps=20,
|
||||
|
|
|
@ -204,7 +204,7 @@ pip install transformers datasets evaluate rouge_score
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="my_awesome_billsum_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -477,7 +477,7 @@ SpeechT5 では、モデルのデコーダ部分への入力が 2 分の 1 に
|
|||
... max_steps=4000,
|
||||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=2,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -288,7 +288,7 @@ pip install transformers datasets evaluate seqeval
|
|||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=2,
|
||||
... weight_decay=0.01,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -208,7 +208,7 @@ pip install transformers datasets evaluate sacrebleu
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="my_awesome_opus_books_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -360,7 +360,7 @@ You should probably TRAIN this model on a down-stream task to be able to use it
|
|||
>>> args = TrainingArguments(
|
||||
... new_model_name,
|
||||
... remove_unused_columns=False,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=batch_size,
|
||||
|
|
|
@ -135,12 +135,12 @@ BERTモデルの事前学習済みのヘッドは破棄され、ランダムに
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
評価メトリクスをファインチューニング中に監視したい場合、トレーニング引数で `evaluation_strategy` パラメータを指定して、各エポックの終了時に評価メトリクスを報告します:
|
||||
評価メトリクスをファインチューニング中に監視したい場合、トレーニング引数で `eval_strategy` パラメータを指定して、各エポックの終了時に評価メトリクスを報告します:
|
||||
|
||||
```python
|
||||
>>> from transformers import TrainingArguments, Trainer
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### Trainer
|
||||
|
|
|
@ -132,7 +132,7 @@ Tue Jan 11 08:58:05 2022
|
|||
```py
|
||||
default_args = {
|
||||
"output_dir": "tmp",
|
||||
"evaluation_strategy": "steps",
|
||||
"eval_strategy": "steps",
|
||||
"num_train_epochs": 1,
|
||||
"log_level": "error",
|
||||
"report_to": "none",
|
||||
|
|
|
@ -274,7 +274,7 @@ MInDS-14 데이터 세트의 샘플링 레이트는 8000kHz이므로([데이터
|
|||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... group_by_length=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=8,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -221,7 +221,7 @@ MinDS-14 데이터 세트의 샘플링 속도는 8000khz이므로(이 정보는
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_mind_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=3e-5,
|
||||
... per_device_train_batch_size=32,
|
||||
|
|
|
@ -385,7 +385,7 @@ end_index 18
|
|||
... num_train_epochs=20,
|
||||
... save_steps=200,
|
||||
... logging_steps=50,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... learning_rate=5e-5,
|
||||
... save_total_limit=2,
|
||||
... remove_unused_columns=False,
|
||||
|
|
|
@ -201,7 +201,7 @@ training_args = TrainingArguments(
|
|||
per_device_eval_batch_size=32,
|
||||
gradient_accumulation_steps=2,
|
||||
save_total_limit=3,
|
||||
evaluation_strategy="steps",
|
||||
eval_strategy="steps",
|
||||
eval_steps=50,
|
||||
save_strategy="steps",
|
||||
save_steps=50,
|
||||
|
|
|
@ -301,7 +301,7 @@ food["test"].set_transform(preprocess_val)
|
|||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_food_model",
|
||||
... remove_unused_columns=False,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
|
|
|
@ -233,7 +233,7 @@ pip install transformers datasets evaluate
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_eli5_clm-model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... weight_decay=0.01,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -236,7 +236,7 @@ Hugging Face 계정에 로그인하여 모델을 업로드하고 커뮤니티와
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_eli5_mlm_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... num_train_epochs=3,
|
||||
... weight_decay=0.01,
|
||||
|
|
|
@ -265,7 +265,7 @@ tokenized_swag = swag.map(preprocess_function, batched=True)
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_swag_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... learning_rate=5e-5,
|
||||
|
|
|
@ -215,7 +215,7 @@ pip install transformers datasets evaluate
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="my_awesome_qa_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -317,7 +317,7 @@ pip install -q datasets transformers evaluate
|
|||
... per_device_train_batch_size=2,
|
||||
... per_device_eval_batch_size=2,
|
||||
... save_total_limit=3,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... save_strategy="steps",
|
||||
... save_steps=20,
|
||||
... eval_steps=20,
|
||||
|
|
|
@ -185,7 +185,7 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True)
|
|||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=2,
|
||||
... weight_decay=0.01,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -211,7 +211,7 @@ Hugging Face 계정에 로그인하면 모델을 업로드하고 커뮤니티에
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="my_awesome_billsum_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -288,7 +288,7 @@ Hugging Face 계정에 로그인하여 모델을 업로드하고 커뮤니티에
|
|||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=2,
|
||||
... weight_decay=0.01,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... load_best_model_at_end=True,
|
||||
... push_to_hub=True,
|
||||
|
|
|
@ -209,7 +209,7 @@ pip install transformers datasets evaluate sacrebleu
|
|||
```py
|
||||
>>> training_args = Seq2SeqTrainingArguments(
|
||||
... output_dir="my_awesome_opus_books_model",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -358,7 +358,7 @@ You should probably TRAIN this model on a down-stream task to be able to use it
|
|||
>>> args = TrainingArguments(
|
||||
... new_model_name,
|
||||
... remove_unused_columns=False,
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... save_strategy="epoch",
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=batch_size,
|
||||
|
|
|
@ -129,12 +129,12 @@ rendered properly in your Markdown viewer.
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
미세 튜닝 중에 평가 지표를 모니터링하려면 훈련 인수에 `evaluation_strategy` 파라미터를 지정하여 각 에폭이 끝날 때 평가 지표를 확인할 수 있습니다:
|
||||
미세 튜닝 중에 평가 지표를 모니터링하려면 훈련 인수에 `eval_strategy` 파라미터를 지정하여 각 에폭이 끝날 때 평가 지표를 확인할 수 있습니다:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments, Trainer
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### 훈련 하기[[trainer]]
|
||||
|
|
|
@ -180,7 +180,7 @@ Nesse ponto, restam apenas três passos:
|
|||
```py
|
||||
>>> training_args = TrainingArguments(
|
||||
... output_dir="./results",
|
||||
... evaluation_strategy="epoch",
|
||||
... eval_strategy="epoch",
|
||||
... learning_rate=2e-5,
|
||||
... per_device_train_batch_size=16,
|
||||
... per_device_eval_batch_size=16,
|
||||
|
|
|
@ -146,13 +146,13 @@ todos os modelos de 🤗 Transformers retornam logits).
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
Se quiser controlar as suas métricas de avaliação durante o fine-tuning, especifique o parâmetro `evaluation_strategy`
|
||||
Se quiser controlar as suas métricas de avaliação durante o fine-tuning, especifique o parâmetro `eval_strategy`
|
||||
nos seus argumentos de treinamento para que o modelo considere a métrica de avaliação ao final de cada época:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### Trainer
|
||||
|
|
|
@ -288,7 +288,7 @@ Wav2Vec2 分词器仅训练了大写字符,因此您需要确保文本与分
|
|||
... gradient_checkpointing=True,
|
||||
... fp16=True,
|
||||
... group_by_length=True,
|
||||
... evaluation_strategy="steps",
|
||||
... eval_strategy="steps",
|
||||
... per_device_eval_batch_size=8,
|
||||
... save_steps=1000,
|
||||
... eval_steps=1000,
|
||||
|
|
|
@ -125,12 +125,12 @@ rendered properly in your Markdown viewer.
|
|||
... return metric.compute(predictions=predictions, references=labels)
|
||||
```
|
||||
|
||||
如果您希望在微调过程中监视评估指标,请在您的训练参数中指定 `evaluation_strategy` 参数,以在每个`epoch`结束时展示评估指标:
|
||||
如果您希望在微调过程中监视评估指标,请在您的训练参数中指定 `eval_strategy` 参数,以在每个`epoch`结束时展示评估指标:
|
||||
|
||||
```py
|
||||
>>> from transformers import TrainingArguments, Trainer
|
||||
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
|
||||
>>> training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
|
||||
```
|
||||
|
||||
### 训练器
|
||||
|
|
|
@ -490,7 +490,7 @@ python3 xla_spawn.py --num_cores ${NUM_TPUS} run_mlm.py --output_dir="./runs" \
|
|||
--do_train \
|
||||
--do_eval \
|
||||
--logging_steps="500" \
|
||||
--evaluation_strategy="epoch" \
|
||||
--eval_strategy="epoch" \
|
||||
--report_to="tensorboard" \
|
||||
--save_strategy="no"
|
||||
```
|
||||
|
@ -538,7 +538,7 @@ python3 -m torch.distributed.launch --nproc_per_node ${NUM_GPUS} run_mlm.py \
|
|||
--do_train \
|
||||
--do_eval \
|
||||
--logging_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--report_to="tensorboard" \
|
||||
--save_strategy="no"
|
||||
```
|
||||
|
|
|
@ -18,7 +18,7 @@ python finetune_trainer.py \
|
|||
--learning_rate=3e-5 \
|
||||
--fp16 \
|
||||
--do_train --do_eval --do_predict \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate \
|
||||
--n_val 1000 \
|
||||
"$@"
|
||||
|
|
|
@ -20,7 +20,7 @@ python xla_spawn.py --num_cores $TPU_NUM_CORES \
|
|||
finetune_trainer.py \
|
||||
--learning_rate=3e-5 \
|
||||
--do_train --do_eval \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--prediction_loss_only \
|
||||
--n_val 1000 \
|
||||
"$@"
|
||||
|
|
|
@ -271,7 +271,7 @@ def main():
|
|||
max_source_length=data_args.max_source_length,
|
||||
prefix=model.config.prefix or "",
|
||||
)
|
||||
if training_args.do_eval or training_args.evaluation_strategy != EvaluationStrategy.NO
|
||||
if training_args.do_eval or training_args.eval_strategy != EvaluationStrategy.NO
|
||||
else None
|
||||
)
|
||||
test_dataset = (
|
||||
|
|
|
@ -32,7 +32,7 @@ python finetune_trainer.py \
|
|||
--max_source_length $MAX_LEN --max_target_length $MAX_LEN \
|
||||
--val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
|
||||
--do_train --do_eval --do_predict \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate --logging_first_step \
|
||||
--task translation --label_smoothing_factor 0.1 \
|
||||
"$@"
|
||||
|
|
|
@ -33,7 +33,7 @@ python xla_spawn.py --num_cores $TPU_NUM_CORES \
|
|||
--max_source_length $MAX_LEN --max_target_length $MAX_LEN \
|
||||
--val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN \
|
||||
--do_train --do_eval \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--prediction_loss_only \
|
||||
--task translation --label_smoothing_factor 0.1 \
|
||||
"$@"
|
||||
|
|
|
@ -34,6 +34,6 @@ python finetune_trainer.py \
|
|||
--logging_first_step \
|
||||
--max_target_length 56 --val_max_target_length $MAX_TGT_LEN --test_max_target_length $MAX_TGT_LEN\
|
||||
--do_train --do_eval --do_predict \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate --sortish_sampler \
|
||||
"$@"
|
||||
|
|
|
@ -29,7 +29,7 @@ python finetune_trainer.py \
|
|||
--num_train_epochs 6 \
|
||||
--save_steps 25000 --eval_steps 25000 --logging_steps 1000 \
|
||||
--do_train --do_eval --do_predict \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate --logging_first_step \
|
||||
--task translation \
|
||||
"$@"
|
||||
|
|
|
@ -283,7 +283,7 @@ To enable Neptune logging, in your `TrainingArguments`, set the `report_to` argu
|
|||
```python
|
||||
training_args = TrainingArguments(
|
||||
"quick-training-distilbert-mrpc",
|
||||
evaluation_strategy="steps",
|
||||
eval_strategy="steps",
|
||||
eval_steps=20,
|
||||
report_to="neptune",
|
||||
)
|
||||
|
|
|
@ -50,7 +50,7 @@ python run_audio_classification.py \
|
|||
--dataloader_num_workers 4 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 10 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--load_best_model_at_end True \
|
||||
--metric_for_best_model accuracy \
|
||||
|
@ -92,7 +92,7 @@ python run_audio_classification.py \
|
|||
--dataloader_num_workers 8 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 10 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--load_best_model_at_end True \
|
||||
--metric_for_best_model accuracy \
|
||||
|
|
|
@ -52,7 +52,7 @@ python run_image_classification.py \
|
|||
--per_device_eval_batch_size 8 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 10 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--load_best_model_at_end True \
|
||||
--save_total_limit 3 \
|
||||
|
|
|
@ -56,7 +56,7 @@ Alternatively, one can decide to further pre-train an already pre-trained (or fi
|
|||
--per_device_eval_batch_size 8 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 10 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--load_best_model_at_end True \
|
||||
--save_total_limit 3 \
|
||||
|
@ -106,7 +106,7 @@ Next, we can run the script by providing the path to this custom configuration (
|
|||
--per_device_eval_batch_size 8 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 10 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--load_best_model_at_end True \
|
||||
--save_total_limit 3 \
|
||||
|
@ -172,7 +172,7 @@ python run_mae.py \
|
|||
--per_device_eval_batch_size 8 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 10 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--load_best_model_at_end True \
|
||||
--save_total_limit 3 \
|
||||
|
|
|
@ -118,7 +118,7 @@ python run_semantic_segmentation.py \
|
|||
--per_device_eval_batch_size 8 \
|
||||
--logging_strategy steps \
|
||||
--logging_steps 100 \
|
||||
--evaluation_strategy epoch \
|
||||
--eval_strategy epoch \
|
||||
--save_strategy epoch \
|
||||
--seed 1337
|
||||
```
|
||||
|
|
|
@ -76,7 +76,7 @@ python run_speech_recognition_ctc.py \
|
|||
--gradient_accumulation_steps="2" \
|
||||
--learning_rate="3e-4" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--length_column_name="input_length" \
|
||||
--save_steps="400" \
|
||||
|
@ -111,7 +111,7 @@ torchrun \
|
|||
--per_device_train_batch_size="4" \
|
||||
--learning_rate="3e-4" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--length_column_name="input_length" \
|
||||
--save_steps="400" \
|
||||
|
@ -162,7 +162,7 @@ However, the `--shuffle_buffer_size` argument controls how many examples we can
|
|||
--gradient_accumulation_steps="2" \
|
||||
--learning_rate="5e-4" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="500" \
|
||||
|
@ -293,7 +293,7 @@ python run_speech_recognition_ctc.py \
|
|||
--per_device_train_batch_size="32" \
|
||||
--learning_rate="1e-3" \
|
||||
--warmup_steps="100" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--length_column_name="input_length" \
|
||||
--save_steps="200" \
|
||||
|
@ -330,7 +330,7 @@ python run_speech_recognition_ctc.py \
|
|||
--per_device_train_batch_size="32" \
|
||||
--learning_rate="1e-3" \
|
||||
--warmup_steps="100" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--length_column_name="input_length" \
|
||||
--save_steps="200" \
|
||||
|
@ -378,7 +378,7 @@ python run_speech_recognition_seq2seq.py \
|
|||
--logging_steps="25" \
|
||||
--learning_rate="1e-5" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--eval_steps="1000" \
|
||||
--save_strategy="steps" \
|
||||
--save_steps="1000" \
|
||||
|
@ -419,7 +419,7 @@ torchrun \
|
|||
--logging_steps="25" \
|
||||
--learning_rate="1e-5" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--eval_steps="1000" \
|
||||
--save_strategy="steps" \
|
||||
--save_steps="1000" \
|
||||
|
@ -547,7 +547,7 @@ python run_speech_recognition_seq2seq.py \
|
|||
--gradient_accumulation_steps="8" \
|
||||
--learning_rate="3e-4" \
|
||||
--warmup_steps="400" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="text" \
|
||||
--save_steps="400" \
|
||||
--eval_steps="400" \
|
||||
|
@ -589,7 +589,7 @@ torchrun \
|
|||
--gradient_accumulation_steps="1" \
|
||||
--learning_rate="3e-4" \
|
||||
--warmup_steps="400" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="text" \
|
||||
--save_steps="400" \
|
||||
--eval_steps="400" \
|
||||
|
|
|
@ -100,7 +100,7 @@ def main():
|
|||
output_dir=args.output_dir,
|
||||
learning_rate=args.learning_rate,
|
||||
lr_scheduler_type=args.lr_scheduler_type,
|
||||
evaluation_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
logging_strategy="epoch",
|
||||
per_device_train_batch_size=args.batch_size,
|
||||
|
|
|
@ -32,7 +32,7 @@ python run_funsd_cord.py \
|
|||
--do_train \
|
||||
--do_eval \
|
||||
--max_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--eval_steps 100 \
|
||||
--learning_rate 1e-5 \
|
||||
--load_best_model_at_end \
|
||||
|
@ -57,7 +57,7 @@ python run_funsd_cord.py \
|
|||
--do_train \
|
||||
--do_eval \
|
||||
--max_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--eval_steps 100 \
|
||||
--learning_rate 5e-5 \
|
||||
--load_best_model_at_end \
|
||||
|
|
|
@ -362,7 +362,7 @@ echo '''python run_speech_recognition_ctc.py \
|
|||
--per_device_train_batch_size="2" \
|
||||
--learning_rate="3e-4" \
|
||||
--save_total_limit="1" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--length_column_name="input_length" \
|
||||
--save_steps="5" \
|
||||
|
@ -438,7 +438,7 @@ echo '''python run_speech_recognition_ctc.py \
|
|||
--learning_rate="7.5e-5" \
|
||||
--warmup_steps="2000" \
|
||||
--length_column_name="input_length" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--text_column_name="sentence" \
|
||||
--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” <20> — ’ … – \
|
||||
--save_steps="500" \
|
||||
|
|
|
@ -51,7 +51,7 @@ parameters_dict = {
|
|||
'train_file': os.path.join(data_dir, 'train.csv'),
|
||||
'infer_file': os.path.join(data_dir, 'infer.csv'),
|
||||
'eval_file': os.path.join(data_dir, 'eval.csv'),
|
||||
'evaluation_strategy': 'steps',
|
||||
'eval_strategy': 'steps',
|
||||
'task_name': 'scitail',
|
||||
'label_list': ['entails', 'neutral'],
|
||||
'per_device_train_batch_size': 32,
|
||||
|
|
|
@ -190,7 +190,7 @@ class FTTrainingArguments:
|
|||
)
|
||||
},
|
||||
)
|
||||
evaluation_strategy: Optional[str] = dataclasses.field(
|
||||
eval_strategy: Optional[str] = dataclasses.field(
|
||||
default="no",
|
||||
metadata={
|
||||
"help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]'
|
||||
|
@ -198,7 +198,7 @@ class FTTrainingArguments:
|
|||
)
|
||||
eval_steps: Optional[int] = dataclasses.field(
|
||||
default=1,
|
||||
metadata={"help": 'Number of update steps between two evaluations if `evaluation_strategy="steps"`.'},
|
||||
metadata={"help": 'Number of update steps between two evaluations if `eval_strategy="steps"`.'},
|
||||
)
|
||||
eval_metric: Optional[str] = dataclasses.field(
|
||||
default="accuracy", metadata={"help": "The evaluation metric used for the task."}
|
||||
|
@ -265,7 +265,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s
|
|||
# Evaluate during training
|
||||
if (
|
||||
eval_dataloader is not None
|
||||
and args.evaluation_strategy == IntervalStrategy.STEPS.value
|
||||
and args.eval_strategy == IntervalStrategy.STEPS.value
|
||||
and args.eval_steps > 0
|
||||
and completed_steps % args.eval_steps == 0
|
||||
):
|
||||
|
@ -331,7 +331,7 @@ def train(args, accelerator, model, tokenizer, train_dataloader, optimizer, lr_s
|
|||
break
|
||||
|
||||
# Evaluate during training
|
||||
if eval_dataloader is not None and args.evaluation_strategy == IntervalStrategy.EPOCH.value:
|
||||
if eval_dataloader is not None and args.eval_strategy == IntervalStrategy.EPOCH.value:
|
||||
accelerator.wait_for_everyone()
|
||||
new_checkpoint = f"checkpoint-{IntervalStrategy.EPOCH.value}-{epoch}"
|
||||
new_eval_result = evaluate(args, accelerator, eval_dataloader, "eval", model, new_checkpoint)[
|
||||
|
@ -571,7 +571,7 @@ def finetune(accelerator, model_name_or_path, train_file, output_dir, **kwargs):
|
|||
assert args.train_file is not None
|
||||
data_files[Split.TRAIN.value] = args.train_file
|
||||
|
||||
if args.do_eval or args.evaluation_strategy != IntervalStrategy.NO.value:
|
||||
if args.do_eval or args.eval_strategy != IntervalStrategy.NO.value:
|
||||
assert args.eval_file is not None
|
||||
data_files[Split.EVAL.value] = args.eval_file
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ parameters_dict = {
|
|||
'train_file': os.path.join(data_dir, '${TRAIN_FILE}'),
|
||||
'infer_file': os.path.join(data_dir, '${INFER_FILE}'),
|
||||
'eval_file': os.path.join(data_dir, '${EVAL_FILE}'),
|
||||
'evaluation_strategy': 'steps',
|
||||
'eval_strategy': 'steps',
|
||||
'task_name': 'scitail',
|
||||
'label_list': ['entails', 'neutral'],
|
||||
'per_device_train_batch_size': 32,
|
||||
|
|
|
@ -79,7 +79,7 @@ class STTrainingArguments:
|
|||
eval_metric: Optional[str] = dataclasses.field(
|
||||
default="accuracy", metadata={"help": "The evaluation metric used for the task."}
|
||||
)
|
||||
evaluation_strategy: Optional[str] = dataclasses.field(
|
||||
eval_strategy: Optional[str] = dataclasses.field(
|
||||
default="no",
|
||||
metadata={
|
||||
"help": 'The evaluation strategy to adopt during training. Possible values are: ["no", "step", "epoch]'
|
||||
|
@ -208,7 +208,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs):
|
|||
data_files["train"] = args.train_file
|
||||
data_files["infer"] = args.infer_file
|
||||
|
||||
if args.evaluation_strategy != IntervalStrategy.NO.value:
|
||||
if args.eval_strategy != IntervalStrategy.NO.value:
|
||||
assert args.eval_file is not None
|
||||
data_files["eval"] = args.eval_file
|
||||
|
||||
|
@ -267,7 +267,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs):
|
|||
"label_list": args.label_list,
|
||||
"output_dir": current_output_dir,
|
||||
"eval_metric": args.eval_metric,
|
||||
"evaluation_strategy": args.evaluation_strategy,
|
||||
"eval_strategy": args.eval_strategy,
|
||||
"early_stopping_patience": args.early_stopping_patience,
|
||||
"early_stopping_threshold": args.early_stopping_threshold,
|
||||
"seed": args.seed,
|
||||
|
@ -341,7 +341,7 @@ def selftrain(model_name_or_path, train_file, infer_file, output_dir, **kwargs):
|
|||
|
||||
data_files["train_pseudo"] = os.path.join(next_data_dir, f"train_pseudo.{args.data_file_extension}")
|
||||
|
||||
if args.evaluation_strategy != IntervalStrategy.NO.value:
|
||||
if args.eval_strategy != IntervalStrategy.NO.value:
|
||||
new_eval_result = eval_result
|
||||
|
||||
if best_iteration is None:
|
||||
|
|
|
@ -71,7 +71,7 @@ python run_wikisql_with_tapex.py \
|
|||
--eval_steps 1000 \
|
||||
--save_steps 1000 \
|
||||
--warmup_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate \
|
||||
--num_beams 5 \
|
||||
--weight_decay 1e-2 \
|
||||
|
@ -101,7 +101,7 @@ python run_wikisql_with_tapex.py \
|
|||
--eval_steps 1000 \
|
||||
--save_steps 1000 \
|
||||
--warmup_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate \
|
||||
--num_beams 5 \
|
||||
--weight_decay 1e-2 \
|
||||
|
@ -132,7 +132,7 @@ python run_wikitablequestions_with_tapex.py \
|
|||
--eval_steps 1000 \
|
||||
--save_steps 1000 \
|
||||
--warmup_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate \
|
||||
--num_beams 5 \
|
||||
--weight_decay 1e-2 \
|
||||
|
@ -162,7 +162,7 @@ python run_wikitablequestions_with_tapex.py \
|
|||
--eval_steps 1000 \
|
||||
--save_steps 1000 \
|
||||
--warmup_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--predict_with_generate \
|
||||
--num_beams 5 \
|
||||
--weight_decay 1e-2 \
|
||||
|
@ -223,7 +223,7 @@ python run_tabfact_with_tapex.py \
|
|||
--learning_rate 3e-5 \
|
||||
--eval_steps 1000 \
|
||||
--save_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--weight_decay 1e-2 \
|
||||
--max_steps 30000 \
|
||||
--max_grad_norm 0.1
|
||||
|
@ -252,7 +252,7 @@ python run_tabfact_with_tapex.py \
|
|||
--learning_rate 3e-5 \
|
||||
--eval_steps 1000 \
|
||||
--save_steps 1000 \
|
||||
--evaluation_strategy steps \
|
||||
--eval_strategy steps \
|
||||
--weight_decay 1e-2 \
|
||||
--max_steps 30000 \
|
||||
--max_grad_norm 0.1
|
||||
|
|
|
@ -182,7 +182,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat
|
|||
--per_device_train_batch_size="16" \
|
||||
--learning_rate="3e-4" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_steps="400" \
|
||||
--eval_steps="400" \
|
||||
--logging_steps="400" \
|
||||
|
@ -209,7 +209,7 @@ Here we will run the script on the *Turkish* Common Voice dataset for demonstrat
|
|||
--per_device_train_batch_size="16" \
|
||||
--learning_rate="3e-4" \
|
||||
--warmup_steps="500" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_steps="400" \
|
||||
--eval_steps="400" \
|
||||
--logging_steps="400" \
|
||||
|
|
|
@ -18,7 +18,7 @@ python run_asr.py \
|
|||
--num_train_epochs="30" \
|
||||
--per_device_train_batch_size="20" \
|
||||
--per_device_eval_batch_size="20" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="100" \
|
||||
--logging_steps="50" \
|
||||
|
@ -73,7 +73,7 @@ python run_asr.py \
|
|||
--per_device_train_batch_size="1" \
|
||||
--per_device_eval_batch_size="1" \
|
||||
--gradient_accumulation_steps="8" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="100" \
|
||||
--logging_steps="50" \
|
||||
|
@ -152,7 +152,7 @@ ZeRO-2:
|
|||
PYTHONPATH=../../../src deepspeed --num_gpus 2 \
|
||||
run_asr.py \
|
||||
--output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \
|
||||
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
|
||||
--per_device_eval_batch_size=2 --eval_strategy=steps --save_steps=500 --eval_steps=100 \
|
||||
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
|
||||
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
|
||||
--dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \
|
||||
|
@ -176,7 +176,7 @@ ZeRO-3:
|
|||
PYTHONPATH=../../../src deepspeed --num_gpus 2 \
|
||||
run_asr.py \
|
||||
--output_dir=output_dir --num_train_epochs=2 --per_device_train_batch_size=2 \
|
||||
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
|
||||
--per_device_eval_batch_size=2 --eval_strategy=steps --save_steps=500 --eval_steps=100 \
|
||||
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
|
||||
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
|
||||
--dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \
|
||||
|
|
|
@ -4,7 +4,7 @@ python run_asr.py \
|
|||
--num_train_epochs="30" \
|
||||
--per_device_train_batch_size="32" \
|
||||
--per_device_eval_batch_size="32" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_total_limit="3" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="100" \
|
||||
|
|
|
@ -4,7 +4,7 @@ python run_asr.py \
|
|||
--num_train_epochs="30" \
|
||||
--per_device_train_batch_size="20" \
|
||||
--per_device_eval_batch_size="20" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="100" \
|
||||
--logging_steps="50" \
|
||||
|
|
|
@ -4,7 +4,7 @@ python run_asr.py \
|
|||
--num_train_epochs="30" \
|
||||
--per_device_train_batch_size="16" \
|
||||
--per_device_eval_batch_size="16" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_total_limit="3" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="100" \
|
||||
|
|
|
@ -5,7 +5,7 @@ python run_asr.py \
|
|||
--per_device_train_batch_size="2" \
|
||||
--per_device_eval_batch_size="2" \
|
||||
--gradient_accumulation_steps="4" \
|
||||
--evaluation_strategy="steps" \
|
||||
--eval_strategy="steps" \
|
||||
--save_steps="500" \
|
||||
--eval_steps="100" \
|
||||
--logging_steps="50" \
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue