Migrate metric to Evaluate in Pytorch examples (#18369)
* Migrate metric to Evaluate in pytorch examples * Remove unused imports
This commit is contained in:
parent
25ec12eaf7
commit
1f84399171
|
@ -23,3 +23,4 @@ torchvision
|
|||
jiwer
|
||||
librosa
|
||||
torch < 1.12
|
||||
evaluate
|
||||
|
|
|
@ -26,6 +26,7 @@ import datasets
|
|||
import numpy as np
|
||||
from datasets import DatasetDict, load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -315,7 +316,7 @@ def main():
|
|||
id2label[str(i)] = label
|
||||
|
||||
# Load the accuracy metric from the datasets package
|
||||
metric = datasets.load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with
|
||||
# `predictions` and `label_ids` fields) and has to return a dictionary string to float.
|
||||
|
|
|
@ -19,7 +19,6 @@ import sys
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset
|
||||
|
@ -34,6 +33,7 @@ from torchvision.transforms import (
|
|||
ToTensor,
|
||||
)
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||
|
@ -252,7 +252,7 @@ def main():
|
|||
id2label[str(i)] = label
|
||||
|
||||
# Load the accuracy metric from the datasets package
|
||||
metric = datasets.load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
|
||||
# predictions and label_ids field) and has to return a dictionary string to float.
|
||||
|
|
|
@ -22,7 +22,7 @@ from pathlib import Path
|
|||
|
||||
import datasets
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from torchvision.transforms import (
|
||||
CenterCrop,
|
||||
|
@ -35,6 +35,7 @@ from torchvision.transforms import (
|
|||
)
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -415,7 +416,7 @@ def main():
|
|||
accelerator.init_trackers("image_classification_no_trainer", experiment_config)
|
||||
|
||||
# Get the metric function
|
||||
metric = load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
# Train!
|
||||
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
|
|
|
@ -30,8 +30,9 @@ from itertools import chain
|
|||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
CONFIG_MAPPING,
|
||||
|
@ -492,7 +493,7 @@ def main():
|
|||
logits = logits[0]
|
||||
return logits.argmax(dim=-1)
|
||||
|
||||
metric = load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
def compute_metrics(eval_preds):
|
||||
preds, labels = eval_preds
|
||||
|
|
|
@ -30,8 +30,9 @@ from itertools import chain
|
|||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
CONFIG_MAPPING,
|
||||
|
@ -515,7 +516,7 @@ def main():
|
|||
logits = logits[0]
|
||||
return logits.argmax(dim=-1)
|
||||
|
||||
metric = load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
def compute_metrics(eval_preds):
|
||||
preds, labels = eval_preds
|
||||
|
|
|
@ -31,10 +31,11 @@ from typing import Optional, Union
|
|||
|
||||
import datasets
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -514,7 +515,7 @@ def main():
|
|||
accelerator.init_trackers("swag_no_trainer", experiment_config)
|
||||
|
||||
# Metrics
|
||||
metric = load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
# Train!
|
||||
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
|
|
|
@ -25,8 +25,9 @@ from dataclasses import dataclass, field
|
|||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from trainer_qa import QuestionAnsweringTrainer
|
||||
from transformers import (
|
||||
|
@ -593,7 +594,7 @@ def main():
|
|||
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
|
||||
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
|
||||
|
||||
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
|
||||
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
|
||||
|
||||
def compute_metrics(p: EvalPrediction):
|
||||
return metric.compute(predictions=p.predictions, references=p.label_ids)
|
||||
|
|
|
@ -25,8 +25,9 @@ from dataclasses import dataclass, field
|
|||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from trainer_qa import QuestionAnsweringTrainer
|
||||
from transformers import (
|
||||
|
@ -625,7 +626,7 @@ def main():
|
|||
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
|
||||
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
|
||||
|
||||
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
|
||||
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
|
||||
|
||||
def compute_metrics(p: EvalPrediction):
|
||||
return metric.compute(predictions=p.predictions, references=p.label_ids)
|
||||
|
|
|
@ -29,10 +29,11 @@ from pathlib import Path
|
|||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -680,7 +681,7 @@ def main():
|
|||
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
|
||||
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
|
||||
|
||||
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")
|
||||
metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
|
||||
|
||||
def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
|
||||
"""
|
||||
|
|
|
@ -29,10 +29,11 @@ from pathlib import Path
|
|||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -696,7 +697,7 @@ def main():
|
|||
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
|
||||
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
|
||||
|
||||
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")
|
||||
metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
|
||||
|
||||
# Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor
|
||||
def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
|
||||
|
|
|
@ -25,8 +25,9 @@ from dataclasses import dataclass, field
|
|||
from typing import List, Optional, Tuple
|
||||
|
||||
import datasets
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer
|
||||
from transformers import (
|
||||
|
@ -581,7 +582,7 @@ def main():
|
|||
pad_to_multiple_of=8 if training_args.fp16 else None,
|
||||
)
|
||||
|
||||
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
|
||||
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
|
||||
|
||||
def compute_metrics(p: EvalPrediction):
|
||||
return metric.compute(predictions=p.predictions, references=p.label_ids)
|
||||
|
|
|
@ -21,7 +21,6 @@ import sys
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset
|
||||
|
@ -30,6 +29,7 @@ from torch import nn
|
|||
from torchvision import transforms
|
||||
from torchvision.transforms import functional
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import (
|
||||
|
@ -337,7 +337,7 @@ def main():
|
|||
label2id = {v: str(k) for k, v in id2label.items()}
|
||||
|
||||
# Load the mean IoU metric from the datasets package
|
||||
metric = datasets.load_metric("mean_iou")
|
||||
metric = evaluate.load("mean_iou")
|
||||
|
||||
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
|
||||
# predictions and label_ids field) and has to return a dictionary string to float.
|
||||
|
|
|
@ -24,13 +24,14 @@ from pathlib import Path
|
|||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader
|
||||
from torchvision import transforms
|
||||
from torchvision.transforms import functional
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -500,7 +501,7 @@ def main():
|
|||
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
|
||||
|
||||
# Instantiate metric
|
||||
metric = load_metric("mean_iou")
|
||||
metric = evaluate.load("mean_iou")
|
||||
|
||||
# We need to initialize the trackers we use, and also store our configuration.
|
||||
# We initialize the trackers only on main process because `accelerator.log`
|
||||
|
|
|
@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union
|
|||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import DatasetDict, load_dataset, load_metric
|
||||
from datasets import DatasetDict, load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -643,7 +644,7 @@ def main():
|
|||
# instantiate a data collator and the trainer
|
||||
|
||||
# Define evaluation metrics during training, *i.e.* word error rate, character error rate
|
||||
eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
|
||||
eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics}
|
||||
|
||||
# for large datasets it is advised to run the preprocessing on a
|
||||
# single machine first with ``args.preprocessing_only`` since there will mostly likely
|
||||
|
|
|
@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union
|
|||
|
||||
import datasets
|
||||
import torch
|
||||
from datasets import DatasetDict, load_dataset, load_metric
|
||||
from datasets import DatasetDict, load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -425,7 +426,7 @@ def main():
|
|||
return
|
||||
|
||||
# 8. Load Metric
|
||||
metric = load_metric("wer")
|
||||
metric = evaluate.load("wer")
|
||||
|
||||
def compute_metrics(pred):
|
||||
pred_ids = pred.predictions
|
||||
|
|
|
@ -27,8 +27,9 @@ from typing import Optional
|
|||
import datasets
|
||||
import nltk # Here to have a nice missing dependency error message early on
|
||||
import numpy as np
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from filelock import FileLock
|
||||
from transformers import (
|
||||
|
@ -598,7 +599,7 @@ def main():
|
|||
)
|
||||
|
||||
# Metric
|
||||
metric = load_metric("rouge")
|
||||
metric = evaluate.load("rouge")
|
||||
|
||||
def postprocess_text(preds, labels):
|
||||
preds = [pred.strip() for pred in preds]
|
||||
|
|
|
@ -30,10 +30,11 @@ import datasets
|
|||
import nltk
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -583,7 +584,7 @@ def main():
|
|||
accelerator.init_trackers("summarization_no_trainer", experiment_config)
|
||||
|
||||
# Metric
|
||||
metric = load_metric("rouge")
|
||||
metric = evaluate.load("rouge")
|
||||
|
||||
# Train!
|
||||
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
|
|
|
@ -25,8 +25,9 @@ from typing import Optional
|
|||
|
||||
import datasets
|
||||
import numpy as np
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -480,9 +481,9 @@ def main():
|
|||
|
||||
# Get the metric function
|
||||
if data_args.task_name is not None:
|
||||
metric = load_metric("glue", data_args.task_name)
|
||||
metric = evaluate.load("glue", data_args.task_name)
|
||||
else:
|
||||
metric = load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
|
||||
# predictions and label_ids field) and has to return a dictionary string to float.
|
||||
|
|
|
@ -23,10 +23,11 @@ from pathlib import Path
|
|||
|
||||
import datasets
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -466,9 +467,9 @@ def main():
|
|||
|
||||
# Get the metric function
|
||||
if args.task_name is not None:
|
||||
metric = load_metric("glue", args.task_name)
|
||||
metric = evaluate.load("glue", args.task_name)
|
||||
else:
|
||||
metric = load_metric("accuracy")
|
||||
metric = evaluate.load("accuracy")
|
||||
|
||||
# Train!
|
||||
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
|
|
|
@ -26,8 +26,9 @@ from typing import Optional
|
|||
|
||||
import datasets
|
||||
import numpy as np
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -349,7 +350,7 @@ def main():
|
|||
)
|
||||
|
||||
# Get the metric function
|
||||
metric = load_metric("xnli")
|
||||
metric = evaluate.load("xnli")
|
||||
|
||||
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
|
||||
# predictions and label_ids field) and has to return a dictionary string to float.
|
||||
|
|
|
@ -27,8 +27,9 @@ from typing import Optional
|
|||
|
||||
import datasets
|
||||
import numpy as np
|
||||
from datasets import ClassLabel, load_dataset, load_metric
|
||||
from datasets import ClassLabel, load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -504,7 +505,7 @@ def main():
|
|||
data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
|
||||
|
||||
# Metrics
|
||||
metric = load_metric("seqeval")
|
||||
metric = evaluate.load("seqeval")
|
||||
|
||||
def compute_metrics(p):
|
||||
predictions, labels = p
|
||||
|
|
|
@ -28,10 +28,11 @@ from pathlib import Path
|
|||
|
||||
import datasets
|
||||
import torch
|
||||
from datasets import ClassLabel, load_dataset, load_metric
|
||||
from datasets import ClassLabel, load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -580,7 +581,7 @@ def main():
|
|||
accelerator.init_trackers("ner_no_trainer", experiment_config)
|
||||
|
||||
# Metrics
|
||||
metric = load_metric("seqeval")
|
||||
metric = evaluate.load("seqeval")
|
||||
|
||||
def get_labels(predictions, references):
|
||||
# Transform predictions and references tensos to numpy arrays
|
||||
|
|
|
@ -26,8 +26,9 @@ from typing import Optional
|
|||
|
||||
import datasets
|
||||
import numpy as np
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
|
@ -522,7 +523,7 @@ def main():
|
|||
)
|
||||
|
||||
# Metric
|
||||
metric = load_metric("sacrebleu")
|
||||
metric = evaluate.load("sacrebleu")
|
||||
|
||||
def postprocess_text(preds, labels):
|
||||
preds = [pred.strip() for pred in preds]
|
||||
|
|
|
@ -29,10 +29,11 @@ from pathlib import Path
|
|||
import datasets
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import load_dataset, load_metric
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import evaluate
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
|
@ -562,7 +563,7 @@ def main():
|
|||
experiment_config["lr_scheduler_type"] = experiment_config["lr_scheduler_type"].value
|
||||
accelerator.init_trackers("translation_no_trainer", experiment_config)
|
||||
|
||||
metric = load_metric("sacrebleu")
|
||||
metric = evaluate.load("sacrebleu")
|
||||
|
||||
def postprocess_text(preds, labels):
|
||||
preds = [pred.strip() for pred in preds]
|
||||
|
|
Loading…
Reference in New Issue