Use yaml to create metadata (#12185)

* Use yaml to create metadata

* Fix typo

* Remove pin
This commit is contained in:
Sylvain Gugger 2021-06-16 13:17:45 -04:00 committed by GitHub
parent 15ef0dc5c6
commit 255a17a089
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 33 deletions

View File

@ -117,6 +117,7 @@ _deps = [
"parameterized", "parameterized",
"protobuf", "protobuf",
"psutil", "psutil",
"pyyaml",
"pydantic", "pydantic",
"pytest", "pytest",
"pytest-sugar", "pytest-sugar",
@ -321,6 +322,7 @@ install_requires = [
deps["huggingface-hub"], deps["huggingface-hub"],
deps["numpy"], deps["numpy"],
deps["packaging"], # utilities from PyPA to e.g., compare versions deps["packaging"], # utilities from PyPA to e.g., compare versions
deps["pyyaml"], # used for the model cards metadata
deps["regex"], # for OpenAI GPT deps["regex"], # for OpenAI GPT
deps["requests"], # for downloading models over HTTPS deps["requests"], # for downloading models over HTTPS
deps["sacremoses"], # for XLM deps["sacremoses"], # for XLM

View File

@ -34,6 +34,7 @@ deps = {
"parameterized": "parameterized", "parameterized": "parameterized",
"protobuf": "protobuf", "protobuf": "protobuf",
"psutil": "psutil", "psutil": "psutil",
"pyyaml": "pyyaml",
"pydantic": "pydantic", "pydantic": "pydantic",
"pytest": "pytest", "pytest": "pytest",
"pytest-sugar": "pytest-sugar", "pytest-sugar": "pytest-sugar",

View File

@ -24,6 +24,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
import requests import requests
import yaml
from huggingface_hub import HfApi from huggingface_hub import HfApi
from . import __version__ from . import __version__
@ -307,15 +308,15 @@ def _listify(obj):
return obj return obj
def _list_possibilities(name, tags): def _insert_values_as_list(metadata, name, values):
if tags is None: if values is None:
return "" return metadata
if isinstance(tags, str): if isinstance(values, str):
tags = [tags] values = [values]
if len(tags) == 0: if len(values) == 0:
return "" return metadata
name_tags = [f"- {tag}" for tag in tags] metadata[name] = values
return f"{name}:\n" + "\n".join(name_tags) + "\n" return metadata
def infer_metric_tags_from_eval_results(eval_results): def infer_metric_tags_from_eval_results(eval_results):
@ -330,6 +331,13 @@ def infer_metric_tags_from_eval_results(eval_results):
return result return result
def _insert_value(metadata, name, value):
if value is None:
return metadata
metadata[name] = value
return metadata
def is_hf_dataset(dataset): def is_hf_dataset(dataset):
if not is_datasets_available(): if not is_datasets_available():
return False return False
@ -381,7 +389,7 @@ class TrainingSummary:
pass pass
def create_model_index(self, metric_mapping): def create_model_index(self, metric_mapping):
model_index = f"model-index:\n- name: {self.model_name}\n" model_index = {"name": self.model_name}
# Dataset mapping tag -> name # Dataset mapping tag -> name
dataset_names = _listify(self.dataset) dataset_names = _listify(self.dataset)
@ -402,42 +410,50 @@ class TrainingSummary:
task_mapping = {None: None} task_mapping = {None: None}
if len(dataset_mapping) == 0: if len(dataset_mapping) == 0:
dataset_mapping = {None: None} dataset_mapping = {None: None}
model_index["results"] = []
# One entry per dataset and per task
all_possibilities = [(task_tag, ds_tag) for task_tag in task_mapping for ds_tag in dataset_mapping] all_possibilities = [(task_tag, ds_tag) for task_tag in task_mapping for ds_tag in dataset_mapping]
model_index += " results:\n"
for task_tag, ds_tag in all_possibilities: for task_tag, ds_tag in all_possibilities:
result = "" result = {}
if task_tag is not None: if task_tag is not None:
result += f" - task:\n name: {task_mapping[task_tag]}\n type: {task_tag}\n" result["task"] = {"name": task_mapping[task_tag], "type": task_tag}
if ds_tag is not None: if ds_tag is not None:
prefix = " - " if task_tag is None else " " result["dataset"] = {"name": dataset_mapping[ds_tag], "type": ds_tag}
result += f"{prefix}dataset:\n name: {dataset_mapping[ds_tag]}\n type: {ds_tag}\n"
if dataset_arg_mapping[ds_tag] is not None: if dataset_arg_mapping[ds_tag] is not None:
result += f" args: {dataset_arg_mapping[ds_tag]}\n" result["dataset"]["args"] = dataset_arg_mapping[ds_tag]
if len(metric_mapping) > 0: if len(metric_mapping) > 0:
result += " metrics:\n"
for metric_tag, metric_name in metric_mapping.items(): for metric_tag, metric_name in metric_mapping.items():
value = self.eval_results[metric_name] result["metric"] = {
result += f" - name: {metric_name}\n type: {metric_tag}\n value: {value}\n" "name": metric_name,
"type": metric_tag,
"value": self.eval_results[metric_name],
}
model_index += result model_index["results"].append(result)
return model_index return [model_index]
def create_metadata(self):
metric_mapping = infer_metric_tags_from_eval_results(self.eval_results)
metadata = {}
metadata = _insert_values_as_list(metadata, "language", self.language)
metadata = _insert_value(metadata, "license", self.license)
metadata = _insert_values_as_list(metadata, "tags", self.tags)
metadata = _insert_values_as_list(metadata, "datasets", self.dataset_tags)
metadata = _insert_values_as_list(metadata, "metrics", list(metric_mapping.keys()))
metadata["model_index"] = self.create_model_index(metric_mapping)
return metadata
def to_model_card(self): def to_model_card(self):
model_card = "" model_card = ""
metric_mapping = infer_metric_tags_from_eval_results(self.eval_results) metadata = yaml.dump(self.create_metadata(), sort_keys=False)
# Metadata
metadata = ""
metadata += _list_possibilities("language", self.language)
if self.license is not None:
metadata += f"license: {self.license}\n"
metadata += _list_possibilities("tags", self.tags)
metadata += _list_possibilities("datasets", self.dataset_tags)
metadata += _list_possibilities("metrics", list(metric_mapping.keys()))
metadata += "\n" + self.create_model_index(metric_mapping)
if len(metadata) > 0: if len(metadata) > 0:
model_card = f"---\n{metadata}---\n" model_card = f"---\n{metadata}---\n"