From 05e72aa0c4bf6d02345727334499f120ecb73254 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Wed, 18 Jan 2023 17:14:00 +0100 Subject: [PATCH] Adapt repository creation to latest hf_hub (#21158) * Adapt repository creation to latest hf_hub * Update all examples * Fix other tests, add Flax examples * Address review comments --- .../run_image_captioning_flax.py | 5 +++-- .../language-modeling/run_bart_dlm_flax.py | 5 +++-- .../flax/language-modeling/run_clm_flax.py | 5 +++-- .../flax/language-modeling/run_mlm_flax.py | 5 +++-- .../flax/language-modeling/run_t5_mlm_flax.py | 5 +++-- examples/flax/question-answering/run_qa.py | 5 +++-- .../summarization/run_summarization_flax.py | 5 +++-- .../flax/text-classification/run_flax_glue.py | 5 +++-- .../flax/token-classification/run_flax_ner.py | 5 +++-- .../flax/vision/run_image_classification.py | 5 +++-- .../run_image_classification_no_trainer.py | 5 +++-- .../language-modeling/run_clm_no_trainer.py | 5 +++-- .../language-modeling/run_mlm_no_trainer.py | 5 +++-- .../multiple-choice/run_swag_no_trainer.py | 5 +++-- .../run_qa_beam_search_no_trainer.py | 5 +++-- .../question-answering/run_qa_no_trainer.py | 5 +++-- .../run_semantic_segmentation_no_trainer.py | 5 +++-- .../run_wav2vec2_pretraining_no_trainer.py | 5 +++-- .../run_summarization_no_trainer.py | 5 +++-- .../text-classification/run_glue_no_trainer.py | 5 +++-- .../token-classification/run_ner_no_trainer.py | 5 +++-- .../translation/run_translation_no_trainer.py | 5 +++-- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- src/transformers/keras_callbacks.py | 6 +----- src/transformers/trainer.py | 18 +++++------------- tests/models/auto/test_processor_auto.py | 5 +++-- tests/pipelines/test_pipelines_common.py | 5 +++-- tests/trainer/test_trainer.py | 4 ++-- utils/update_metadata.py | 4 +--- 30 files changed, 83 insertions(+), 73 deletions(-) diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index 1258eba49f..e460984686 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util from flax.jax_utils import unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AutoFeatureExtractor, AutoTokenizer, @@ -430,7 +430,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index 6872e59345..2b8d07539e 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util from flax.jax_utils import pad_shard_unpad from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, FLAX_MODEL_FOR_MASKED_LM_MAPPING, @@ -502,7 +502,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index 7e0d1010c1..0d516878bc 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -46,7 +46,7 @@ from flax import jax_utils, traverse_util from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, FLAX_MODEL_FOR_CAUSAL_LM_MAPPING, @@ -376,7 +376,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index 2383492aa4..a2b45b12a2 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -46,7 +46,7 @@ from flax import jax_utils, traverse_util from flax.jax_utils import pad_shard_unpad from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, FLAX_MODEL_FOR_MASKED_LM_MAPPING, @@ -416,7 +416,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index ceae49c6b1..9fb7bdce0d 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util from flax.jax_utils import pad_shard_unpad from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, FLAX_MODEL_FOR_MASKED_LM_MAPPING, @@ -542,7 +542,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 5b628ca9ae..0e84785de3 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -44,7 +44,7 @@ from flax import struct, traverse_util from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AutoConfig, AutoTokenizer, @@ -467,7 +467,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # region Load Data # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index fb3eb8d28c..361746e282 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -46,7 +46,7 @@ from flax import jax_utils, traverse_util from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, FLAX_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, @@ -450,7 +450,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 04457bdd2a..ec9b7848f4 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -39,7 +39,7 @@ from flax import struct, traverse_util from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AutoConfig, AutoTokenizer, @@ -350,7 +350,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index 7224b5915e..78c5f9090e 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -41,7 +41,7 @@ from flax import struct, traverse_util from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AutoConfig, AutoTokenizer, @@ -406,7 +406,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 22065438d2..33a277fa4f 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -43,7 +43,7 @@ from flax import jax_utils from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, FLAX_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, @@ -298,7 +298,8 @@ def main(): ) else: repo_name = training_args.hub_model_id - repo = Repository(training_args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=training_args.hub_token) + repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) # Initialize datasets and pre-processing transforms # We use torchvision here for faster pre-processing diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 037201f16f..dab195e428 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -40,7 +40,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AutoConfig, AutoFeatureExtractor, @@ -246,7 +246,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 0524ca83f4..2bb59ccc35 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -41,7 +41,7 @@ import transformers from accelerate import Accelerator, DistributedType from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -282,7 +282,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index f7759cb26f..07e219ea16 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -41,7 +41,7 @@ import transformers from accelerate import Accelerator, DistributedType from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -291,7 +291,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 8a971821f7..6ffeb7bcdf 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -40,7 +40,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -317,7 +317,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 1576986305..7a81e527da 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -38,7 +38,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AdamW, DataCollatorWithPadding, @@ -332,7 +332,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 51dda97f7a..b1bfaf45f0 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -38,7 +38,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -370,7 +370,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index f1ff9ad720..0a05230a40 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -36,7 +36,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository, hf_hub_download +from huggingface_hub import Repository, create_repo, hf_hub_download from transformers import ( AutoConfig, AutoFeatureExtractor, @@ -354,7 +354,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index c15a8b73f5..3b6acadec5 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -31,7 +31,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator from accelerate.logging import get_logger -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AdamW, SchedulerType, @@ -422,7 +422,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index dae706a80b..b4bf969626 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -40,7 +40,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed from filelock import FileLock -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -373,7 +373,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index f6c9af68d2..88272b0387 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -32,7 +32,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( AutoConfig, AutoModelForSequenceClassification, @@ -244,7 +244,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 746a1ba0ae..fe8e00524d 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -37,7 +37,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -298,7 +298,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index b245c73408..0da14bab44 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -38,7 +38,7 @@ import transformers from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -345,7 +345,8 @@ def main(): repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + create_repo(repo_name, exist_ok=True, token=args.hub_token) + repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/setup.py b/setup.py index aa7be0f6cb..cb65234edf 100644 --- a/setup.py +++ b/setup.py @@ -117,7 +117,7 @@ _deps = [ "fugashi>=1.0", "GitPython<3.1.19", "hf-doc-builder>=0.3.0", - "huggingface-hub>=0.10.0,<1.0", + "huggingface-hub>=0.11.0,<1.0", "importlib_metadata", "ipadic>=1.0.0,<2.0", "isort>=5.5.4", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index b81734043f..2bdceb926a 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -23,7 +23,7 @@ deps = { "fugashi": "fugashi>=1.0", "GitPython": "GitPython<3.1.19", "hf-doc-builder": "hf-doc-builder>=0.3.0", - "huggingface-hub": "huggingface-hub>=0.10.0,<1.0", + "huggingface-hub": "huggingface-hub>=0.11.0,<1.0", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", "isort": "isort>=5.5.4", diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index 7128f348c3..f99bd738ea 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -340,11 +340,7 @@ class PushToHubCallback(Callback): self.output_dir = output_dir self.hub_model_id = hub_model_id create_repo(self.hub_model_id, exist_ok=True) - self.repo = Repository( - str(self.output_dir), - clone_from=self.hub_model_id, - use_auth_token=hub_token if hub_token else True, - ) + self.repo = Repository(str(self.output_dir), clone_from=self.hub_model_id, token=hub_token) self.tokenizer = tokenizer self.last_job = None diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 644b4f6715..3d60f80332 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -60,7 +60,7 @@ from torch import nn from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler from torch.utils.data.distributed import DistributedSampler -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from . import __version__ from .configuration_utils import PretrainedConfig @@ -3315,7 +3315,6 @@ class Trainer: """ if not self.is_world_process_zero(): return - use_auth_token = True if self.args.hub_token is None else self.args.hub_token if self.args.hub_model_id is None: repo_name = Path(self.args.output_dir).absolute().name else: @@ -3323,22 +3322,15 @@ class Trainer: if "/" not in repo_name: repo_name = get_full_repo_name(repo_name, token=self.args.hub_token) + # Make sure the repo exists. + create_repo(repo_name, token=self.args.hub_token, private=self.args.hub_private_repo, exist_ok=True) try: - self.repo = Repository( - self.args.output_dir, - clone_from=repo_name, - use_auth_token=use_auth_token, - private=self.args.hub_private_repo, - ) + self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token) except EnvironmentError: if self.args.overwrite_output_dir and at_init: # Try again after wiping output_dir shutil.rmtree(self.args.output_dir) - self.repo = Repository( - self.args.output_dir, - clone_from=repo_name, - use_auth_token=use_auth_token, - ) + self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token) else: raise diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index 91cd85a893..107e3611db 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -21,7 +21,7 @@ import unittest from pathlib import Path from shutil import copyfile -from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token +from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token from requests.exceptions import HTTPError from transformers import ( CONFIG_MAPPING, @@ -282,7 +282,8 @@ class ProcessorPushToHubTester(unittest.TestCase): processor = CustomProcessor(feature_extractor, tokenizer) with tempfile.TemporaryDirectory() as tmp_dir: - repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-processor", use_auth_token=self._token) + create_repo(f"{USER}/test-dynamic-processor", token=self._token) + repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-processor", token=self._token) processor.save_pretrained(tmp_dir) # This has added the proper auto_map field to the feature extractor config diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index f5e75381e3..8da9ed89a6 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -29,7 +29,7 @@ from unittest import skipIf import datasets import numpy as np -from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token +from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token from requests.exceptions import HTTPError from transformers import ( FEATURE_EXTRACTOR_MAPPING, @@ -1023,7 +1023,8 @@ class DynamicPipelineTester(unittest.TestCase): model = BertForSequenceClassification(config).eval() with tempfile.TemporaryDirectory() as tmp_dir: - repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", use_auth_token=self._token) + create_repo(f"{USER}/test-dynamic-pipeline", token=self._token) + repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token) vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 8589607fdf..47f3ebbc44 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2079,7 +2079,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase): time.sleep(0.5) with tempfile.TemporaryDirectory() as tmp_dir: - _ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-epoch", use_auth_token=self._token) + _ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-epoch", token=self._token) commits = self.get_commit_history(tmp_dir) self.assertIn("initial commit", commits) # We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if @@ -2106,7 +2106,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase): time.sleep(0.5) with tempfile.TemporaryDirectory() as tmp_dir: - _ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-step", use_auth_token=self._token) + _ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-step", token=self._token) commits = self.get_commit_history(tmp_dir) self.assertIn("initial commit", commits) # We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if diff --git a/utils/update_metadata.py b/utils/update_metadata.py index 6c5f3ee2b8..d6dea03a8b 100644 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -214,9 +214,7 @@ def update_metadata(token, commit_sha): Update the metadata for the Transformers repo. """ with tempfile.TemporaryDirectory() as tmp_dir: - repo = Repository( - tmp_dir, clone_from="huggingface/transformers-metadata", repo_type="dataset", use_auth_token=token - ) + repo = Repository(tmp_dir, clone_from="huggingface/transformers-metadata", repo_type="dataset", token=token) frameworks_table = get_frameworks_table() frameworks_dataset = Dataset.from_pandas(frameworks_table)