Migrate metric to Evaluate in Pytorch examples (#18369)

* Migrate metric to Evaluate in pytorch examples

* Remove unused imports
This commit is contained in:
atturaioe 2022-08-01 14:40:25 +03:00 committed by GitHub
parent 25ec12eaf7
commit 1f84399171
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 72 additions and 49 deletions

View File

@ -23,3 +23,4 @@ torchvision
jiwer
librosa
torch < 1.12
evaluate

View File

@ -26,6 +26,7 @@ import datasets
import numpy as np
from datasets import DatasetDict, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -315,7 +316,7 @@ def main():
id2label[str(i)] = label
# Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy")
metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with
# `predictions` and `label_ids` fields) and has to return a dictionary string to float.

View File

@ -19,7 +19,6 @@ import sys
from dataclasses import dataclass, field
from typing import Optional
import datasets
import numpy as np
import torch
from datasets import load_dataset
@ -34,6 +33,7 @@ from torchvision.transforms import (
ToTensor,
)
import evaluate
import transformers
from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
@ -252,7 +252,7 @@ def main():
id2label[str(i)] = label
# Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy")
metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -22,7 +22,7 @@ from pathlib import Path
import datasets
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from torchvision.transforms import (
CenterCrop,
@ -35,6 +35,7 @@ from torchvision.transforms import (
)
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -415,7 +416,7 @@ def main():
accelerator.init_trackers("image_classification_no_trainer", experiment_config)
# Get the metric function
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
CONFIG_MAPPING,
@ -492,7 +493,7 @@ def main():
logits = logits[0]
return logits.argmax(dim=-1)
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
def compute_metrics(eval_preds):
preds, labels = eval_preds

View File

@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
CONFIG_MAPPING,
@ -515,7 +516,7 @@ def main():
logits = logits[0]
return logits.argmax(dim=-1)
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
def compute_metrics(eval_preds):
preds, labels = eval_preds

View File

@ -31,10 +31,11 @@ from typing import Optional, Union
import datasets
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -514,7 +515,7 @@ def main():
accelerator.init_trackers("swag_no_trainer", experiment_config)
# Metrics
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
@ -593,7 +594,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)

View File

@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
@ -625,7 +626,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)

View File

@ -29,10 +29,11 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -680,7 +681,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
"""

View File

@ -29,10 +29,11 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -696,7 +697,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
# Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor
def create_and_fill_np_array(start_or_end_logits, dataset, max_len):

View File

@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import List, Optional, Tuple
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer
from transformers import (
@ -581,7 +582,7 @@ def main():
pad_to_multiple_of=8 if training_args.fp16 else None,
)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)

View File

@ -21,7 +21,6 @@ import sys
from dataclasses import dataclass, field
from typing import Optional
import datasets
import numpy as np
import torch
from datasets import load_dataset
@ -30,6 +29,7 @@ from torch import nn
from torchvision import transforms
from torchvision.transforms import functional
import evaluate
import transformers
from huggingface_hub import hf_hub_download
from transformers import (
@ -337,7 +337,7 @@ def main():
label2id = {v: str(k) for k, v in id2label.items()}
# Load the mean IoU metric from the datasets package
metric = datasets.load_metric("mean_iou")
metric = evaluate.load("mean_iou")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -24,13 +24,14 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import functional
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -500,7 +501,7 @@ def main():
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
# Instantiate metric
metric = load_metric("mean_iou")
metric = evaluate.load("mean_iou")
# We need to initialize the trackers we use, and also store our configuration.
# We initialize the trackers only on main process because `accelerator.log`

View File

@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union
import datasets
import numpy as np
import torch
from datasets import DatasetDict, load_dataset, load_metric
from datasets import DatasetDict, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -643,7 +644,7 @@ def main():
# instantiate a data collator and the trainer
# Define evaluation metrics during training, *i.e.* word error rate, character error rate
eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics}
# for large datasets it is advised to run the preprocessing on a
# single machine first with ``args.preprocessing_only`` since there will mostly likely

View File

@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union
import datasets
import torch
from datasets import DatasetDict, load_dataset, load_metric
from datasets import DatasetDict, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -425,7 +426,7 @@ def main():
return
# 8. Load Metric
metric = load_metric("wer")
metric = evaluate.load("wer")
def compute_metrics(pred):
pred_ids = pred.predictions

View File

@ -27,8 +27,9 @@ from typing import Optional
import datasets
import nltk # Here to have a nice missing dependency error message early on
import numpy as np
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from filelock import FileLock
from transformers import (
@ -598,7 +599,7 @@ def main():
)
# Metric
metric = load_metric("rouge")
metric = evaluate.load("rouge")
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]

View File

@ -30,10 +30,11 @@ import datasets
import nltk
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -583,7 +584,7 @@ def main():
accelerator.init_trackers("summarization_no_trainer", experiment_config)
# Metric
metric = load_metric("rouge")
metric = evaluate.load("rouge")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -25,8 +25,9 @@ from typing import Optional
import datasets
import numpy as np
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -480,9 +481,9 @@ def main():
# Get the metric function
if data_args.task_name is not None:
metric = load_metric("glue", data_args.task_name)
metric = evaluate.load("glue", data_args.task_name)
else:
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -23,10 +23,11 @@ from pathlib import Path
import datasets
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -466,9 +467,9 @@ def main():
# Get the metric function
if args.task_name is not None:
metric = load_metric("glue", args.task_name)
metric = evaluate.load("glue", args.task_name)
else:
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

View File

@ -26,8 +26,9 @@ from typing import Optional
import datasets
import numpy as np
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -349,7 +350,7 @@ def main():
)
# Get the metric function
metric = load_metric("xnli")
metric = evaluate.load("xnli")
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.

View File

@ -27,8 +27,9 @@ from typing import Optional
import datasets
import numpy as np
from datasets import ClassLabel, load_dataset, load_metric
from datasets import ClassLabel, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -504,7 +505,7 @@ def main():
data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
# Metrics
metric = load_metric("seqeval")
metric = evaluate.load("seqeval")
def compute_metrics(p):
predictions, labels = p

View File

@ -28,10 +28,11 @@ from pathlib import Path
import datasets
import torch
from datasets import ClassLabel, load_dataset, load_metric
from datasets import ClassLabel, load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -580,7 +581,7 @@ def main():
accelerator.init_trackers("ner_no_trainer", experiment_config)
# Metrics
metric = load_metric("seqeval")
metric = evaluate.load("seqeval")
def get_labels(predictions, references):
# Transform predictions and references tensos to numpy arrays

View File

@ -26,8 +26,9 @@ from typing import Optional
import datasets
import numpy as np
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
@ -522,7 +523,7 @@ def main():
)
# Metric
metric = load_metric("sacrebleu")
metric = evaluate.load("sacrebleu")
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]

View File

@ -29,10 +29,11 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
@ -562,7 +563,7 @@ def main():
experiment_config["lr_scheduler_type"] = experiment_config["lr_scheduler_type"].value
accelerator.init_trackers("translation_no_trainer", experiment_config)
metric = load_metric("sacrebleu")
metric = evaluate.load("sacrebleu")
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]