From 1f843991716bba3b73071f1a2d819b0aebc05375 Mon Sep 17 00:00:00 2001 From: atturaioe <76523524+atturaioe@users.noreply.github.com> Date: Mon, 1 Aug 2022 14:40:25 +0300 Subject: [PATCH] Migrate metric to Evaluate in Pytorch examples (#18369) * Migrate metric to Evaluate in pytorch examples * Remove unused imports --- examples/pytorch/_tests_requirements.txt | 1 + .../audio-classification/run_audio_classification.py | 3 ++- .../image-classification/run_image_classification.py | 4 ++-- .../run_image_classification_no_trainer.py | 5 +++-- examples/pytorch/language-modeling/run_clm.py | 5 +++-- examples/pytorch/language-modeling/run_mlm.py | 5 +++-- examples/pytorch/multiple-choice/run_swag_no_trainer.py | 5 +++-- examples/pytorch/question-answering/run_qa.py | 5 +++-- examples/pytorch/question-answering/run_qa_beam_search.py | 5 +++-- .../question-answering/run_qa_beam_search_no_trainer.py | 5 +++-- examples/pytorch/question-answering/run_qa_no_trainer.py | 5 +++-- examples/pytorch/question-answering/run_seq2seq_qa.py | 5 +++-- .../semantic-segmentation/run_semantic_segmentation.py | 4 ++-- .../run_semantic_segmentation_no_trainer.py | 5 +++-- .../speech-recognition/run_speech_recognition_ctc.py | 5 +++-- .../speech-recognition/run_speech_recognition_seq2seq.py | 5 +++-- examples/pytorch/summarization/run_summarization.py | 5 +++-- .../pytorch/summarization/run_summarization_no_trainer.py | 5 +++-- examples/pytorch/text-classification/run_glue.py | 7 ++++--- .../pytorch/text-classification/run_glue_no_trainer.py | 7 ++++--- examples/pytorch/text-classification/run_xnli.py | 5 +++-- examples/pytorch/token-classification/run_ner.py | 5 +++-- .../pytorch/token-classification/run_ner_no_trainer.py | 5 +++-- examples/pytorch/translation/run_translation.py | 5 +++-- examples/pytorch/translation/run_translation_no_trainer.py | 5 +++-- 25 files changed, 72 insertions(+), 49 deletions(-) diff --git a/examples/pytorch/_tests_requirements.txt b/examples/pytorch/_tests_requirements.txt index 6e2dd71816..edaae597ab 100644 --- a/examples/pytorch/_tests_requirements.txt +++ b/examples/pytorch/_tests_requirements.txt @@ -23,3 +23,4 @@ torchvision jiwer librosa torch < 1.12 +evaluate diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py index 8c380033ba..6c2a6cb880 100644 --- a/examples/pytorch/audio-classification/run_audio_classification.py +++ b/examples/pytorch/audio-classification/run_audio_classification.py @@ -26,6 +26,7 @@ import datasets import numpy as np from datasets import DatasetDict, load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -315,7 +316,7 @@ def main(): id2label[str(i)] = label # Load the accuracy metric from the datasets package - metric = datasets.load_metric("accuracy") + metric = evaluate.load("accuracy") # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with # `predictions` and `label_ids` fields) and has to return a dictionary string to float. diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 1c75b53b47..f8c2c95f59 100644 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -19,7 +19,6 @@ import sys from dataclasses import dataclass, field from typing import Optional -import datasets import numpy as np import torch from datasets import load_dataset @@ -34,6 +33,7 @@ from torchvision.transforms import ( ToTensor, ) +import evaluate import transformers from transformers import ( MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, @@ -252,7 +252,7 @@ def main(): id2label[str(i)] = label # Load the accuracy metric from the datasets package - metric = datasets.load_metric("accuracy") + metric = evaluate.load("accuracy") # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index e59cc490f7..773e0265d3 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -22,7 +22,7 @@ from pathlib import Path import datasets import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from torchvision.transforms import ( CenterCrop, @@ -35,6 +35,7 @@ from torchvision.transforms import ( ) from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -415,7 +416,7 @@ def main(): accelerator.init_trackers("image_classification_no_trainer", experiment_config) # Get the metric function - metric = load_metric("accuracy") + metric = evaluate.load("accuracy") # Train! total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 759dfe204a..53052d7671 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -30,8 +30,9 @@ from itertools import chain from typing import Optional import datasets -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( CONFIG_MAPPING, @@ -492,7 +493,7 @@ def main(): logits = logits[0] return logits.argmax(dim=-1) - metric = load_metric("accuracy") + metric = evaluate.load("accuracy") def compute_metrics(eval_preds): preds, labels = eval_preds diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 29742eba98..dcc8bcd3cd 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -30,8 +30,9 @@ from itertools import chain from typing import Optional import datasets -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( CONFIG_MAPPING, @@ -515,7 +516,7 @@ def main(): logits = logits[0] return logits.argmax(dim=-1) - metric = load_metric("accuracy") + metric = evaluate.load("accuracy") def compute_metrics(eval_preds): preds, labels = eval_preds diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index fb22d54b20..d72be5d95b 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -31,10 +31,11 @@ from typing import Optional, Union import datasets import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -514,7 +515,7 @@ def main(): accelerator.init_trackers("swag_no_trainer", experiment_config) # Metrics - metric = load_metric("accuracy") + metric = evaluate.load("accuracy") # Train! total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index 68fb457616..cddcb4891b 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -25,8 +25,9 @@ from dataclasses import dataclass, field from typing import Optional import datasets -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from trainer_qa import QuestionAnsweringTrainer from transformers import ( @@ -593,7 +594,7 @@ def main(): references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) - metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad") def compute_metrics(p: EvalPrediction): return metric.compute(predictions=p.predictions, references=p.label_ids) diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index e9db4632f9..1c389e43f3 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -25,8 +25,9 @@ from dataclasses import dataclass, field from typing import Optional import datasets -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from trainer_qa import QuestionAnsweringTrainer from transformers import ( @@ -625,7 +626,7 @@ def main(): references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) - metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad") def compute_metrics(p: EvalPrediction): return metric.compute(predictions=p.predictions, references=p.label_ids) diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index eca4c60a6d..e6c66e379a 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -29,10 +29,11 @@ from pathlib import Path import datasets import numpy as np import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -680,7 +681,7 @@ def main(): references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) - metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad") def create_and_fill_np_array(start_or_end_logits, dataset, max_len): """ diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index fe839f28b1..ec86d95b5e 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -29,10 +29,11 @@ from pathlib import Path import datasets import numpy as np import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -696,7 +697,7 @@ def main(): references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) - metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad") # Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor def create_and_fill_np_array(start_or_end_logits, dataset, max_len): diff --git a/examples/pytorch/question-answering/run_seq2seq_qa.py b/examples/pytorch/question-answering/run_seq2seq_qa.py index b460f8d9d5..c3c85b31da 100644 --- a/examples/pytorch/question-answering/run_seq2seq_qa.py +++ b/examples/pytorch/question-answering/run_seq2seq_qa.py @@ -25,8 +25,9 @@ from dataclasses import dataclass, field from typing import List, Optional, Tuple import datasets -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer from transformers import ( @@ -581,7 +582,7 @@ def main(): pad_to_multiple_of=8 if training_args.fp16 else None, ) - metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad") def compute_metrics(p: EvalPrediction): return metric.compute(predictions=p.predictions, references=p.label_ids) diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index a3fe5734b0..92d07f8f91 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -21,7 +21,6 @@ import sys from dataclasses import dataclass, field from typing import Optional -import datasets import numpy as np import torch from datasets import load_dataset @@ -30,6 +29,7 @@ from torch import nn from torchvision import transforms from torchvision.transforms import functional +import evaluate import transformers from huggingface_hub import hf_hub_download from transformers import ( @@ -337,7 +337,7 @@ def main(): label2id = {v: str(k) for k, v in id2label.items()} # Load the mean IoU metric from the datasets package - metric = datasets.load_metric("mean_iou") + metric = evaluate.load("mean_iou") # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index 35d069a747..c7ac9da3e9 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -24,13 +24,14 @@ from pathlib import Path import datasets import numpy as np import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from PIL import Image from torch.utils.data import DataLoader from torchvision import transforms from torchvision.transforms import functional from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -500,7 +501,7 @@ def main(): args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) # Instantiate metric - metric = load_metric("mean_iou") + metric = evaluate.load("mean_iou") # We need to initialize the trackers we use, and also store our configuration. # We initialize the trackers only on main process because `accelerator.log` diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index 720dfd1ea0..fdbed2b3ab 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union import datasets import numpy as np import torch -from datasets import DatasetDict, load_dataset, load_metric +from datasets import DatasetDict, load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -643,7 +644,7 @@ def main(): # instantiate a data collator and the trainer # Define evaluation metrics during training, *i.e.* word error rate, character error rate - eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics} + eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics} # for large datasets it is advised to run the preprocessing on a # single machine first with ``args.preprocessing_only`` since there will mostly likely diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py index 608bb953c9..0ce8ff0550 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py @@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union import datasets import torch -from datasets import DatasetDict, load_dataset, load_metric +from datasets import DatasetDict, load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -425,7 +426,7 @@ def main(): return # 8. Load Metric - metric = load_metric("wer") + metric = evaluate.load("wer") def compute_metrics(pred): pred_ids = pred.predictions diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index 031ac25fa2..636b7a60c4 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -27,8 +27,9 @@ from typing import Optional import datasets import nltk # Here to have a nice missing dependency error message early on import numpy as np -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from filelock import FileLock from transformers import ( @@ -598,7 +599,7 @@ def main(): ) # Metric - metric = load_metric("rouge") + metric = evaluate.load("rouge") def postprocess_text(preds, labels): preds = [pred.strip() for pred in preds] diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 16f24cbdab..dd15b331c9 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -30,10 +30,11 @@ import datasets import nltk import numpy as np import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -583,7 +584,7 @@ def main(): accelerator.init_trackers("summarization_no_trainer", experiment_config) # Metric - metric = load_metric("rouge") + metric = evaluate.load("rouge") # Train! total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index ebd515d24e..556e1f3bbe 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -25,8 +25,9 @@ from typing import Optional import datasets import numpy as np -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -480,9 +481,9 @@ def main(): # Get the metric function if data_args.task_name is not None: - metric = load_metric("glue", data_args.task_name) + metric = evaluate.load("glue", data_args.task_name) else: - metric = load_metric("accuracy") + metric = evaluate.load("accuracy") # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 4a022db69c..326515c2ec 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -23,10 +23,11 @@ from pathlib import Path import datasets import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -466,9 +467,9 @@ def main(): # Get the metric function if args.task_name is not None: - metric = load_metric("glue", args.task_name) + metric = evaluate.load("glue", args.task_name) else: - metric = load_metric("accuracy") + metric = evaluate.load("accuracy") # Train! total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index fc7e1a80ff..2450d24e3d 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -26,8 +26,9 @@ from typing import Optional import datasets import numpy as np -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -349,7 +350,7 @@ def main(): ) # Get the metric function - metric = load_metric("xnli") + metric = evaluate.load("xnli") # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index c2c4274259..13993e58a4 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -27,8 +27,9 @@ from typing import Optional import datasets import numpy as np -from datasets import ClassLabel, load_dataset, load_metric +from datasets import ClassLabel, load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -504,7 +505,7 @@ def main(): data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None) # Metrics - metric = load_metric("seqeval") + metric = evaluate.load("seqeval") def compute_metrics(p): predictions, labels = p diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index e6fbc8bfd0..5027f93823 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -28,10 +28,11 @@ from pathlib import Path import datasets import torch -from datasets import ClassLabel, load_dataset, load_metric +from datasets import ClassLabel, load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -580,7 +581,7 @@ def main(): accelerator.init_trackers("ner_no_trainer", experiment_config) # Metrics - metric = load_metric("seqeval") + metric = evaluate.load("seqeval") def get_labels(predictions, references): # Transform predictions and references tensos to numpy arrays diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 0f21eb2733..4d03cbf49c 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -26,8 +26,9 @@ from typing import Optional import datasets import numpy as np -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -522,7 +523,7 @@ def main(): ) # Metric - metric = load_metric("sacrebleu") + metric = evaluate.load("sacrebleu") def postprocess_text(preds, labels): preds = [pred.strip() for pred in preds] diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 6db6e11c50..914181511b 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -29,10 +29,11 @@ from pathlib import Path import datasets import numpy as np import torch -from datasets import load_dataset, load_metric +from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.auto import tqdm +import evaluate import transformers from accelerate import Accelerator from accelerate.logging import get_logger @@ -562,7 +563,7 @@ def main(): experiment_config["lr_scheduler_type"] = experiment_config["lr_scheduler_type"].value accelerator.init_trackers("translation_no_trainer", experiment_config) - metric = load_metric("sacrebleu") + metric = evaluate.load("sacrebleu") def postprocess_text(preds, labels): preds = [pred.strip() for pred in preds]