Add push_to_hub to no_trainer examples (#13659)

* Add push_to_hub to no_trainer examples

* Quality

* Document integration

* Roll out to other examples
This commit is contained in:
Sylvain Gugger 2021-09-21 13:13:30 -04:00 committed by GitHub
parent a722c301bf
commit b7d264be0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 319 additions and 27 deletions

View File

@ -74,6 +74,17 @@ line, 🤗 Trainer supports resuming from a checkpoint via `trainer.train(resume
2. If `resume_from_checkpoint` is a path to a specific checkpoint it will use that saved checkpoint folder to resume the training from. 2. If `resume_from_checkpoint` is a path to a specific checkpoint it will use that saved checkpoint folder to resume the training from.
### Upload the trained/fine-tuned model to the Hub
All the example scripts support automatic upload of your final model to the [Model Hub](https://huggingface.co/models) by adding a `--push_to_hub` argument. It will then create a repository with your username slash the name of the folder you are using as `output_dir`. For instance, `"sgugger/test-mrpc"` if your username is `sgugger` and you are working in the folder `~/tmp/test-mrpc`.
To specify a given repository name, use the `--hub_model_id` argument. You will need to specify the whole repository name (including your username), for instance `--hub_model_id sgugger/finetuned-bert-mrpc`. To upload to an organization you are a member of, just use the name of that organization instead of your username: `--hub_model_id huggingface/finetuned-bert-mrpc`.
A few notes on this integration:
- you will need to be logged in to the Hugging Face website locally for it to work, the easiest way to achieve this is to run `huggingface-cli login` and then type your username and password when prompted. You can also pass along your authentication token with the `--hub_token` argument.
- the `output_dir` you pick will either need to be a new folder or a local clone of the distant repository you are using.
## Distributed training and mixed precision ## Distributed training and mixed precision
All the PyTorch scripts mentioned above work out of the box with distributed training and mixed precision, thanks to All the PyTorch scripts mentioned above work out of the box with distributed training and mixed precision, thanks to

View File

@ -27,6 +27,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import torch import torch
@ -36,6 +37,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator, DistributedType from accelerate import Accelerator, DistributedType
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -48,6 +50,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
@ -176,7 +179,11 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--no_keep_linebreaks", action="store_true", help="Do not keep line breaks when using TXT files." "--no_keep_linebreaks", action="store_true", help="Do not keep line breaks when using TXT files."
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -190,8 +197,8 @@ def parse_args():
extension = args.validation_file.split(".")[-1] extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file." assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -223,6 +230,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -480,10 +499,22 @@ def main():
logger.info(f"epoch {epoch}: perplexity: {perplexity}") logger.info(f"epoch {epoch}: perplexity: {perplexity}")
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -27,6 +27,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import torch import torch
@ -36,6 +37,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator, DistributedType from accelerate import Accelerator, DistributedType
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -48,6 +50,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
@ -185,7 +188,11 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--mlm_probability", type=float, default=0.15, help="Ratio of tokens to mask for masked language modeling loss" "--mlm_probability", type=float, default=0.15, help="Ratio of tokens to mask for masked language modeling loss"
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -199,8 +206,8 @@ def parse_args():
extension = args.validation_file.split(".")[-1] extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file." assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -232,6 +239,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -518,10 +537,22 @@ def main():
logger.info(f"epoch {epoch}: perplexity: {perplexity}") logger.info(f"epoch {epoch}: perplexity: {perplexity}")
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -24,6 +24,7 @@ import math
import os import os
import random import random
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Union from typing import Optional, Union
import datasets import datasets
@ -34,6 +35,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -47,7 +49,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import PaddingStrategy from transformers.file_utils import PaddingStrategy, get_full_repo_name
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -169,9 +171,15 @@ def parse_args():
action="store_true", action="store_true",
help="Activate debug mode and run training only with a subset of data.", help="Activate debug mode and run training only with a subset of data.",
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
if args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True) if args.push_to_hub:
assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -260,6 +268,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -478,10 +498,22 @@ def main():
eval_metric = metric.compute() eval_metric = metric.compute()
accelerator.print(f"epoch {epoch}: {eval_metric}") accelerator.print(f"epoch {epoch}: {eval_metric}")
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -23,6 +23,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
@ -33,6 +34,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from huggingface_hub import Repository
from transformers import ( from transformers import (
AdamW, AdamW,
DataCollatorWithPadding, DataCollatorWithPadding,
@ -45,6 +47,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils import check_min_version from transformers.utils import check_min_version
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
from utils_qa import postprocess_qa_predictions_with_beam_search from utils_qa import postprocess_qa_predictions_with_beam_search
@ -203,7 +206,11 @@ def parse_args():
default=None, default=None,
help="For debugging purposes or quicker training, truncate the number of prediction examples to this", help="For debugging purposes or quicker training, truncate the number of prediction examples to this",
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -225,8 +232,8 @@ def parse_args():
extension = args.test_file.split(".")[-1] extension = args.test_file.split(".")[-1]
assert extension in ["csv", "json"], "`test_file` should be a csv or a json file." assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -258,6 +265,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -703,8 +722,15 @@ def main():
if completed_steps >= args.max_train_steps: if completed_steps >= args.max_train_steps:
break break
# intialize all lists to collect the batches if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
# intialize all lists to collect the batches
all_start_top_log_probs = [] all_start_top_log_probs = []
all_start_top_index = [] all_start_top_index = []
all_end_top_log_probs = [] all_end_top_log_probs = []
@ -821,6 +847,10 @@ def main():
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -23,6 +23,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
@ -33,6 +34,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -47,6 +49,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils import check_min_version from transformers.utils import check_min_version
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
from utils_qa import postprocess_qa_predictions from utils_qa import postprocess_qa_predictions
@ -232,7 +235,11 @@ def parse_args():
help="Model type to use if training from scratch.", help="Model type to use if training from scratch.",
choices=MODEL_TYPES, choices=MODEL_TYPES,
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -254,8 +261,8 @@ def parse_args():
extension = args.test_file.split(".")[-1] extension = args.test_file.split(".")[-1]
assert extension in ["csv", "json"], "`test_file` should be a csv or a json file." assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -287,6 +294,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -708,6 +727,14 @@ def main():
if completed_steps >= args.max_train_steps: if completed_steps >= args.max_train_steps:
break break
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
# Evaluation # Evaluation
logger.info("***** Running Evaluation *****") logger.info("***** Running Evaluation *****")
logger.info(f" Num examples = {len(eval_dataset)}") logger.info(f" Num examples = {len(eval_dataset)}")
@ -782,6 +809,10 @@ def main():
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -23,6 +23,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import nltk import nltk
@ -35,6 +36,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from filelock import FileLock from filelock import FileLock
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -47,7 +49,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import is_offline_mode from transformers.file_utils import get_full_repo_name, is_offline_mode
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
@ -255,7 +257,11 @@ def parse_args():
help="Model type to use if training from scratch.", help="Model type to use if training from scratch.",
choices=MODEL_TYPES, choices=MODEL_TYPES,
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -269,8 +275,8 @@ def parse_args():
extension = args.validation_file.split(".")[-1] extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -313,6 +319,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -576,10 +594,22 @@ def main():
logger.info(result) logger.info(result)
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -18,6 +18,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset, load_metric
@ -26,6 +27,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from huggingface_hub import Repository
from transformers import ( from transformers import (
AdamW, AdamW,
AutoConfig, AutoConfig,
@ -38,6 +40,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
@ -142,6 +145,11 @@ def parse_args():
) )
parser.add_argument("--output_dir", type=str, default=None, help="Where to store the final model.") parser.add_argument("--output_dir", type=str, default=None, help="Where to store the final model.")
parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -155,8 +163,8 @@ def parse_args():
extension = args.validation_file.split(".")[-1] extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -188,6 +196,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
@ -426,10 +446,22 @@ def main():
eval_metric = metric.compute() eval_metric = metric.compute()
logger.info(f"epoch {epoch}: {eval_metric}") logger.info(f"epoch {epoch}: {eval_metric}")
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if args.task_name == "mnli": if args.task_name == "mnli":
# Final evaluation on mismatched validation set # Final evaluation on mismatched validation set

View File

@ -23,6 +23,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import torch import torch
@ -32,6 +33,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -45,6 +47,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
@ -195,6 +198,11 @@ def parse_args():
action="store_true", action="store_true",
help="Activate debug mode and run training only with a subset of data.", help="Activate debug mode and run training only with a subset of data.",
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -208,8 +216,8 @@ def parse_args():
extension = args.validation_file.split(".")[-1] extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -241,6 +249,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -552,10 +572,22 @@ def main():
eval_metric = compute_metrics() eval_metric = compute_metrics()
accelerator.print(f"epoch {epoch}:", eval_metric) accelerator.print(f"epoch {epoch}:", eval_metric)
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -23,6 +23,7 @@ import logging
import math import math
import os import os
import random import random
from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
@ -33,6 +34,7 @@ from tqdm.auto import tqdm
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from huggingface_hub import Repository
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
@ -48,6 +50,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.file_utils import get_full_repo_name
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
@ -235,7 +238,11 @@ def parse_args():
help="Model type to use if training from scratch.", help="Model type to use if training from scratch.",
choices=MODEL_TYPES, choices=MODEL_TYPES,
) )
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument(
"--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
)
parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
args = parser.parse_args() args = parser.parse_args()
# Sanity checks # Sanity checks
@ -250,8 +257,9 @@ def parse_args():
extension = args.validation_file.split(".")[-1] extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
if args.output_dir is not None: if args.push_to_hub:
os.makedirs(args.output_dir, exist_ok=True) assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
return args return args
@ -284,6 +292,18 @@ def main():
if args.seed is not None: if args.seed is not None:
set_seed(args.seed) set_seed(args.seed)
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
# (the dataset will be downloaded automatically from the datasets Hub). # (the dataset will be downloaded automatically from the datasets Hub).
@ -553,10 +573,22 @@ def main():
eval_metric = metric.compute() eval_metric = metric.compute()
logger.info({"bleu": eval_metric["score"]}) logger.info({"bleu": eval_metric["score"]})
if args.push_to_hub and epoch < args.num_train_epochs - 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
if args.output_dir is not None: if args.output_dir is not None:
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
if __name__ == "__main__": if __name__ == "__main__":