From 8f0f7271d00903d1f0d6c08d11b5ad05b30e5faa Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 28 May 2024 10:53:28 +0200 Subject: [PATCH 01/36] use `@main` (#31065) use main Co-authored-by: ydshieh --- .github/workflows/ssh-runner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml index ab87b2d5fa..3319be368a 100644 --- a/.github/workflows/ssh-runner.yml +++ b/.github/workflows/ssh-runner.yml @@ -56,7 +56,7 @@ jobs: nvidia-smi - name: Tailscale # In order to be able to SSH when a test fails - uses: huggingface/tailscale-action@v1 + uses: huggingface/tailscale-action@main with: authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} From 8e3b1fef9792673c8900c303cb93f65b70fd48e1 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 28 May 2024 11:36:26 +0200 Subject: [PATCH 02/36] Remove `ninja` from docker image build (#31080) fix Co-authored-by: ydshieh --- docker/transformers-all-latest-gpu/Dockerfile | 4 ++++ .../models/deformable_detr/test_modeling_deformable_detr.py | 6 ------ tests/models/rwkv/test_modeling_rwkv.py | 6 ------ 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index b888397f95..378a65d1bf 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -51,6 +51,10 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes # Some tests require quanto RUN python3 -m pip install --no-cache-dir quanto +# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests +# (`deformable_detr`, `rwkv`, `mra`) +RUN python3 -m pip uninstall -y ninja + # For `dinat` model # The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent) RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 032bfcb4e1..2ae3e3f088 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -232,12 +232,6 @@ class DeformableDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT self.model_tester = DeformableDetrModelTester(self) self.config_tester = ConfigTester(self, config_class=DeformableDetrConfig, has_text_modality=False) - @unittest.skip( - "This starts to fail since 2024/05/24, but earlier commits also fail now and affect many other tests. The error is `an illegal memory access was encountered`." - ) - def test_model_parallelism(self): - super().test_model_parallelism() - def test_config(self): # we don't test common_properties and arguments_init as these don't apply for Deformable DETR self.config_tester.create_and_test_config_to_json_string() diff --git a/tests/models/rwkv/test_modeling_rwkv.py b/tests/models/rwkv/test_modeling_rwkv.py index 48c240d8e3..d2a41a863d 100644 --- a/tests/models/rwkv/test_modeling_rwkv.py +++ b/tests/models/rwkv/test_modeling_rwkv.py @@ -304,12 +304,6 @@ class RwkvModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin standardMsg = "%s not found in %s" % (safe_repr(member), safe_repr(container)) self.fail(self._formatMessage(msg, standardMsg)) - @unittest.skip( - "This starts to fail since 2024/05/24, but earlier commits also fail now and affect many other tests. The error is `an illegal memory access was encountered`." - ) - def test_model_parallelism(self): - super().test_model_parallelism() - def test_config(self): self.config_tester.run_common_tests() From e18da4e3f292b05a81eef6925e19caea22d4e14b Mon Sep 17 00:00:00 2001 From: Clint Adams Date: Tue, 28 May 2024 05:48:23 -0400 Subject: [PATCH 03/36] fix "piano" typo (#31027) --- src/transformers/models/pop2piano/modeling_pop2piano.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/pop2piano/modeling_pop2piano.py b/src/transformers/models/pop2piano/modeling_pop2piano.py index 7b8795e453..c769cff3c4 100644 --- a/src/transformers/models/pop2piano/modeling_pop2piano.py +++ b/src/transformers/models/pop2piano/modeling_pop2piano.py @@ -71,7 +71,7 @@ POP2PIANO_INPUTS_DOCSTRING = r""" so you should be able to pad the inputs on both the right and the left. Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for detail. [What are input IDs?](../glossary#input-ids) To know more on how to prepare `input_ids` for pretraining - take a look a [Pop2Pianp Training](./Pop2Piano#training). + take a look a [Pop2Piano Training](./Pop2Piano#training). attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*): Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: - 1 for tokens that are **not masked**, From dd4654eab7593be34294dc16279f52e4efa8869e Mon Sep 17 00:00:00 2001 From: AP <108011872+apalkk@users.noreply.github.com> Date: Tue, 28 May 2024 09:50:45 +0000 Subject: [PATCH 04/36] Update quicktour.md to fix broken link to Glossary (#31072) Update quicktour.md to fix broken link Missing '/' in attention mask link in the transformers quicktour --- docs/source/en/quicktour.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/quicktour.md b/docs/source/en/quicktour.md index 9f8ae15700..d3770a18f9 100755 --- a/docs/source/en/quicktour.md +++ b/docs/source/en/quicktour.md @@ -204,7 +204,7 @@ Pass your text to the tokenizer: The tokenizer returns a dictionary containing: * [input_ids](./glossary#input-ids): numerical representations of your tokens. -* [attention_mask](.glossary#attention-mask): indicates which tokens should be attended to. +* [attention_mask](./glossary#attention-mask): indicates which tokens should be attended to. A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length: From 537deb7869f7711ec67aad9459b9f78ad5df5161 Mon Sep 17 00:00:00 2001 From: Hengwen Tong Date: Tue, 28 May 2024 17:52:47 +0800 Subject: [PATCH 05/36] Remove redundant backend checks in training_args.py (#30999) * Remove backend checks in training_args.py * Expilicit initialize the device --------- Co-authored-by: tonghengwen --- src/transformers/training_args.py | 37 ++++--------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 7c5d4b1c73..a97139a07b 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -67,7 +67,7 @@ if is_torch_available(): import torch import torch.distributed as dist - from .pytorch_utils import is_torch_greater_or_equal_than_2_0, is_torch_greater_or_equal_than_2_3 + from .pytorch_utils import is_torch_greater_or_equal_than_2_0 if is_accelerate_available(): from accelerate.state import AcceleratorState, PartialState @@ -1677,38 +1677,9 @@ class TrainingArguments: ) self.accelerator_config.split_batches = self.split_batches - if ( - self.framework == "pt" - and is_torch_available() - and (self.device.type == "cpu" and not is_torch_greater_or_equal_than_2_3) - and (self.device.type != "cuda") - and (self.device.type != "mlu") - and (self.device.type != "npu") - and (self.device.type != "xpu") - and (get_xla_device_type(self.device) not in ["GPU", "CUDA"]) - and (self.fp16 or self.fp16_full_eval) - ): - raise ValueError( - "FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation" - " (`--fp16_full_eval`) can only be used on CUDA or MLU devices or NPU devices or certain XPU devices (with IPEX)." - ) - - if ( - self.framework == "pt" - and is_torch_available() - and (self.device.type != "cuda") - and (self.device.type != "mlu") - and (self.device.type != "npu") - and (self.device.type != "xpu") - and (get_xla_device_type(self.device) not in ["GPU", "CUDA"]) - and (get_xla_device_type(self.device) != "TPU") - and (self.device.type != "cpu") - and (self.bf16 or self.bf16_full_eval) - ): - raise ValueError( - "BF16 Mixed precision training with AMP (`--bf16`) and BF16 half precision evaluation" - " (`--bf16_full_eval`) can only be used on CUDA, XPU (with IPEX), NPU, MLU or CPU/TPU/NeuronCore devices." - ) + # Initialize device before we proceed + if self.framework == "pt" and is_torch_available(): + self.device if self.torchdynamo is not None: warnings.warn( From 936ab7bae5e040ec58994cb722dd587b9ab26581 Mon Sep 17 00:00:00 2001 From: oOraph <13552058+oOraph@users.noreply.github.com> Date: Tue, 28 May 2024 11:56:05 +0200 Subject: [PATCH 06/36] fix from_pretrained in offline mode when model is preloaded in cache (#31010) * Unit test to verify fix Signed-off-by: Raphael Glon * fix from_pretrained in offline mode when model is preloaded in cache Signed-off-by: Raphael Glon * minor: fmt Signed-off-by: Raphael Glon --------- Signed-off-by: Raphael Glon Co-authored-by: Raphael Glon --- src/transformers/modeling_utils.py | 118 ++++++++++++++--------------- tests/test_modeling_utils.py | 96 ++++++++++++++++++++++- 2 files changed, 154 insertions(+), 60 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index e58185dc7c..bd232f9464 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3392,70 +3392,70 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ) if resolved_archive_file is not None: is_sharded = True - - if not local_files_only and resolved_archive_file is not None: - if filename in [WEIGHTS_NAME, WEIGHTS_INDEX_NAME]: - # If the PyTorch file was found, check if there is a safetensors file on the repository - # If there is no safetensors file on the repositories, start an auto conversion - safe_weights_name = SAFE_WEIGHTS_INDEX_NAME if is_sharded else SAFE_WEIGHTS_NAME + if not local_files_only and not is_offline_mode(): + if resolved_archive_file is not None: + if filename in [WEIGHTS_NAME, WEIGHTS_INDEX_NAME]: + # If the PyTorch file was found, check if there is a safetensors file on the repository + # If there is no safetensors file on the repositories, start an auto conversion + safe_weights_name = SAFE_WEIGHTS_INDEX_NAME if is_sharded else SAFE_WEIGHTS_NAME + has_file_kwargs = { + "revision": revision, + "proxies": proxies, + "token": token, + } + cached_file_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "resume_download": resume_download, + "local_files_only": local_files_only, + "user_agent": user_agent, + "subfolder": subfolder, + "_raise_exceptions_for_gated_repo": False, + "_raise_exceptions_for_missing_entries": False, + "_commit_hash": commit_hash, + **has_file_kwargs, + } + if not has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs): + Thread( + target=auto_conversion, + args=(pretrained_model_name_or_path,), + kwargs={"ignore_errors_during_conversion": True, **cached_file_kwargs}, + name="Thread-autoconversion", + ).start() + else: + # Otherwise, no PyTorch file was found, maybe there is a TF or Flax model file. + # We try those to give a helpful error message. has_file_kwargs = { "revision": revision, "proxies": proxies, "token": token, } - cached_file_kwargs = { - "cache_dir": cache_dir, - "force_download": force_download, - "resume_download": resume_download, - "local_files_only": local_files_only, - "user_agent": user_agent, - "subfolder": subfolder, - "_raise_exceptions_for_gated_repo": False, - "_raise_exceptions_for_missing_entries": False, - "_commit_hash": commit_hash, - **has_file_kwargs, - } - if not has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs): - Thread( - target=auto_conversion, - args=(pretrained_model_name_or_path,), - kwargs={"ignore_errors_during_conversion": True, **cached_file_kwargs}, - name="Thread-autoconversion", - ).start() - else: - # Otherwise, no PyTorch file was found, maybe there is a TF or Flax model file. - # We try those to give a helpful error message. - has_file_kwargs = { - "revision": revision, - "proxies": proxies, - "token": token, - } - if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs): - raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named" - f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for TensorFlow weights." - " Use `from_tf=True` to load this model from those weights." - ) - elif has_file(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME, **has_file_kwargs): - raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named" - f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for Flax weights. Use" - " `from_flax=True` to load this model from those weights." - ) - elif variant is not None and has_file( - pretrained_model_name_or_path, WEIGHTS_NAME, **has_file_kwargs - ): - raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named" - f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file without the variant" - f" {variant}. Use `variant=None` to load this model from those weights." - ) - else: - raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named" - f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)}," - f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}." - ) + if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs): + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for TensorFlow weights." + " Use `from_tf=True` to load this model from those weights." + ) + elif has_file(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME, **has_file_kwargs): + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for Flax weights. Use" + " `from_flax=True` to load this model from those weights." + ) + elif variant is not None and has_file( + pretrained_model_name_or_path, WEIGHTS_NAME, **has_file_kwargs + ): + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file without the variant" + f" {variant}. Use `variant=None` to load this model from those weights." + ) + else: + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named" + f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)}," + f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}." + ) except EnvironmentError: # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted # to the original exception. diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index 9a00340d14..01620724e7 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -33,6 +33,7 @@ from requests.exceptions import HTTPError from transformers import ( AutoConfig, AutoModel, + AutoModelForImageClassification, AutoModelForSequenceClassification, OwlViTForObjectDetection, PretrainedConfig, @@ -76,7 +77,6 @@ sys.path.append(str(Path(__file__).parent.parent / "utils")) from test_module.custom_configuration import CustomConfig, NoSuperInitConfig # noqa E402 - if is_torch_available(): import torch from safetensors.torch import save_file as safe_save_file @@ -194,6 +194,97 @@ if is_torch_available(): attention_mask = _prepare_4d_attention_mask(mask, dtype=inputs_embeds.dtype) return attention_mask + class TestOffline(unittest.TestCase): + def test_offline(self): + # Ugly setup with monkeypatches, amending env vars here is too late as libs have already been imported + from huggingface_hub import constants + + from transformers.utils import hub + + offlfine_env = hub._is_offline_mode + hub_cache_env = constants.HF_HUB_CACHE + hub_cache_env1 = constants.HUGGINGFACE_HUB_CACHE + default_cache = constants.default_cache_path + transformers_cache = hub.TRANSFORMERS_CACHE + + try: + hub._is_offline_mode = True + with tempfile.TemporaryDirectory() as tmpdir: + LOG.info("Temporary cache dir %s", tmpdir) + constants.HF_HUB_CACHE = tmpdir + constants.HUGGINGFACE_HUB_CACHE = tmpdir + constants.default_cache_path = tmpdir + hub.TRANSFORMERS_CACHE = tmpdir + # First offline load should fail + try: + AutoModelForImageClassification.from_pretrained( + TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None + ) + except OSError: + LOG.info("Loading model %s in offline mode failed as expected", TINY_IMAGE_CLASSIF) + else: + self.fail("Loading model {} in offline mode should fail".format(TINY_IMAGE_CLASSIF)) + + # Download model -> Huggingface Hub not concerned by our offline mode + LOG.info("Downloading %s for offline tests", TINY_IMAGE_CLASSIF) + hub_api = HfApi() + local_dir = hub_api.snapshot_download(TINY_IMAGE_CLASSIF, cache_dir=tmpdir) + + LOG.info("Model %s downloaded in %s", TINY_IMAGE_CLASSIF, local_dir) + + AutoModelForImageClassification.from_pretrained( + TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None + ) + finally: + # Tear down: reset env as it was before calling this test + hub._is_offline_mode = offlfine_env + constants.HF_HUB_CACHE = hub_cache_env + constants.HUGGINGFACE_HUB_CACHE = hub_cache_env1 + constants.default_cache_path = default_cache + hub.TRANSFORMERS_CACHE = transformers_cache + + def test_local_files_only(self): + # Ugly setup with monkeypatches, amending env vars here is too late as libs have already been imported + from huggingface_hub import constants + + from transformers.utils import hub + + hub_cache_env = constants.HF_HUB_CACHE + hub_cache_env1 = constants.HUGGINGFACE_HUB_CACHE + default_cache = constants.default_cache_path + transformers_cache = hub.TRANSFORMERS_CACHE + try: + with tempfile.TemporaryDirectory() as tmpdir: + LOG.info("Temporary cache dir %s", tmpdir) + constants.HF_HUB_CACHE = tmpdir + constants.HUGGINGFACE_HUB_CACHE = tmpdir + constants.default_cache_path = tmpdir + hub.TRANSFORMERS_CACHE = tmpdir + try: + AutoModelForImageClassification.from_pretrained( + TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None, local_files_only=True + ) + except OSError: + LOG.info("Loading model %s in offline mode failed as expected", TINY_IMAGE_CLASSIF) + else: + self.fail("Loading model {} in offline mode should fail".format(TINY_IMAGE_CLASSIF)) + + LOG.info("Downloading %s for offline tests", TINY_IMAGE_CLASSIF) + hub_api = HfApi() + local_dir = hub_api.snapshot_download(TINY_IMAGE_CLASSIF, cache_dir=tmpdir) + + LOG.info("Model %s downloaded in %s", TINY_IMAGE_CLASSIF, local_dir) + + AutoModelForImageClassification.from_pretrained( + TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None, local_files_only=True + ) + finally: + # Tear down: reset env as it was before calling this test + constants.HF_HUB_CACHE = hub_cache_env + constants.HUGGINGFACE_HUB_CACHE = hub_cache_env1 + constants.default_cache_path = default_cache + hub.TRANSFORMERS_CACHE = transformers_cache + if is_flax_available(): from transformers import FlaxBertModel @@ -205,6 +296,9 @@ if is_tf_available(): TINY_T5 = "patrickvonplaten/t5-tiny-random" TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification" TINY_MISTRAL = "hf-internal-testing/tiny-random-MistralForCausalLM" +TINY_IMAGE_CLASSIF = "hf-internal-testing/tiny-random-SiglipForImageClassification" + +LOG = logging.get_logger(__name__) def check_models_equal(model1, model2): From c31473ed4492fdf26aec4173451f31590021862f Mon Sep 17 00:00:00 2001 From: Pavel Iakubovskii Date: Tue, 28 May 2024 10:41:40 +0000 Subject: [PATCH 07/36] Remove float64 cast for OwlVit and OwlV2 to support MPS device (#31071) Remove float64 --- src/transformers/models/owlv2/modeling_owlv2.py | 1 - src/transformers/models/owlvit/modeling_owlvit.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/transformers/models/owlv2/modeling_owlv2.py b/src/transformers/models/owlv2/modeling_owlv2.py index a7924085fc..05c5cd4595 100644 --- a/src/transformers/models/owlv2/modeling_owlv2.py +++ b/src/transformers/models/owlv2/modeling_owlv2.py @@ -1276,7 +1276,6 @@ class Owlv2ClassPredictionHead(nn.Module): if query_mask.ndim > 1: query_mask = torch.unsqueeze(query_mask, dim=-2) - pred_logits = pred_logits.to(torch.float64) pred_logits = torch.where(query_mask == 0, -1e6, pred_logits) pred_logits = pred_logits.to(torch.float32) diff --git a/src/transformers/models/owlvit/modeling_owlvit.py b/src/transformers/models/owlvit/modeling_owlvit.py index a7d8445523..ee6d8aa423 100644 --- a/src/transformers/models/owlvit/modeling_owlvit.py +++ b/src/transformers/models/owlvit/modeling_owlvit.py @@ -1257,7 +1257,6 @@ class OwlViTClassPredictionHead(nn.Module): if query_mask.ndim > 1: query_mask = torch.unsqueeze(query_mask, dim=-2) - pred_logits = pred_logits.to(torch.float64) pred_logits = torch.where(query_mask == 0, -1e6, pred_logits) pred_logits = pred_logits.to(torch.float32) From 98e2d48e9af3631e8f8f2070198912fc5d6bc19e Mon Sep 17 00:00:00 2001 From: Pavel Iakubovskii Date: Tue, 28 May 2024 11:06:06 +0000 Subject: [PATCH 08/36] Fix OWLv2 post_process_object_detection for multiple images (#31082) * Add test for multiple images * [run slow] owlv2 * Fix box rescaling * [run slow] owlv2 --- .../models/owlv2/image_processing_owlv2.py | 16 ++------ .../owlv2/test_image_processor_owlv2.py | 37 ++++++++++++++++--- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index 2ba3772d00..1e9a5163a1 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -524,19 +524,11 @@ class Owlv2ImageProcessor(BaseImageProcessor): else: img_h, img_w = target_sizes.unbind(1) - # rescale coordinates - width_ratio = 1 - height_ratio = 1 + # Rescale coordinates, image is padded to square for inference, + # that is why we need to scale boxes to the max size + size = torch.max(img_h, img_w) + scale_fct = torch.stack([size, size, size, size], dim=1).to(boxes.device) - if img_w < img_h: - width_ratio = img_w / img_h - elif img_h < img_w: - height_ratio = img_h / img_w - - img_w = img_w / width_ratio - img_h = img_h / height_ratio - - scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(boxes.device) boxes = boxes * scale_fct[:, None, :] results = [] diff --git a/tests/models/owlv2/test_image_processor_owlv2.py b/tests/models/owlv2/test_image_processor_owlv2.py index 16b6b24df3..87b96d0654 100644 --- a/tests/models/owlv2/test_image_processor_owlv2.py +++ b/tests/models/owlv2/test_image_processor_owlv2.py @@ -130,17 +130,42 @@ class Owlv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): model = Owlv2ForObjectDetection.from_pretrained(checkpoint) image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - inputs = processor(text=["cat"], images=image, return_tensors="pt") + text = ["cat"] + target_size = image.size[::-1] + expected_boxes = torch.tensor( + [ + [341.66656494140625, 23.38756561279297, 642.321044921875, 371.3482971191406], + [6.753320693969727, 51.96149826049805, 326.61810302734375, 473.12982177734375], + ] + ) + # single image + inputs = processor(text=[text], images=[image], return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) - target_sizes = torch.tensor([image.size[::-1]]) - results = processor.post_process_object_detection(outputs, threshold=0.2, target_sizes=target_sizes)[0] + results = processor.post_process_object_detection(outputs, threshold=0.2, target_sizes=[target_size])[0] - boxes = results["boxes"].tolist() - self.assertEqual(boxes[0], [341.66656494140625, 23.38756561279297, 642.321044921875, 371.3482971191406]) - self.assertEqual(boxes[1], [6.753320693969727, 51.96149826049805, 326.61810302734375, 473.12982177734375]) + boxes = results["boxes"] + self.assertTrue( + torch.allclose(boxes, expected_boxes, atol=1e-2), + f"Single image bounding boxes fail. Expected {expected_boxes}, got {boxes}", + ) + + # batch of images + inputs = processor(text=[text, text], images=[image, image], return_tensors="pt") + with torch.no_grad(): + outputs = model(**inputs) + results = processor.post_process_object_detection( + outputs, threshold=0.2, target_sizes=[target_size, target_size] + ) + + for result in results: + boxes = result["boxes"] + self.assertTrue( + torch.allclose(boxes, expected_boxes, atol=1e-2), + f"Batch image bounding boxes fail. Expected {expected_boxes}, got {boxes}", + ) @unittest.skip("OWLv2 doesn't treat 4 channel PIL and numpy consistently yet") # FIXME Amy def test_call_numpy_4_channels(self): From 66add161dcaa91f4e60c8e3224ed297ab72e7b0f Mon Sep 17 00:00:00 2001 From: Sina Taslimi <33656391+taslimisina@users.noreply.github.com> Date: Tue, 28 May 2024 13:09:32 +0200 Subject: [PATCH 09/36] Fix typo in trainer.py (#31048) --- src/transformers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 20c709b248..58e5fd14b6 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3565,7 +3565,7 @@ class Trainer: When used with `load_best_model_at_end`, make sure `metric_for_best_model` references exactly one of the datasets. If you, for example, pass in `{"data1": data1, "data2": data2}` for two datasets `data1` and `data2`, you could specify `metric_for_best_model="eval_data1_loss"` for using the - loss on `data1` and `metric_for_best_model="eval_data1_loss"` for the loss on `data2`. + loss on `data1` and `metric_for_best_model="eval_data2_loss"` for the loss on `data2`. From 90da0b1c9f0892d43ef5e963cbf9bf4b93f1a911 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Tue, 28 May 2024 13:22:06 +0200 Subject: [PATCH 10/36] [SuperPoint, PaliGemma] Update docs (#31025) * Update docs * Add PaliGemma resources * Address comment * Update docs --- docs/source/en/model_doc/paligemma.md | 42 +++++++++++++++++++++++++- docs/source/en/model_doc/superpoint.md | 21 ++++++++++--- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/docs/source/en/model_doc/paligemma.md b/docs/source/en/model_doc/paligemma.md index 10946caa83..48debe593f 100644 --- a/docs/source/en/model_doc/paligemma.md +++ b/docs/source/en/model_doc/paligemma.md @@ -18,11 +18,51 @@ rendered properly in your Markdown viewer. ## Overview -The PaliGemma model was proposed by Google. It is a 3B VLM composed by a Siglip-400m vision encoder and a Gemma-2B decoder linked by a multimodal linear projection. It is not a chat model with images. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models. +The PaliGemma model was proposed in [PaliGemma – Google's Cutting-Edge Open Vision Language Model](https://huggingface.co/blog/paligemma) by Google. It is a 3B vision-language model composed by a [SigLIP](siglip) vision encoder and a [Gemma](gemma) language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models. + + + PaliGemma architecture. Taken from the blog post. This model was contributed by [Molbap](https://huggingface.co/Molbap). +## Usage tips + +Inference with PaliGemma can be performed as follows: + +```python +from transformers import AutoProcessor, PaliGemmaForConditionalGeneration + +model_id = "google/paligemma-3b-mix-224" +model = PaliGemmaForConditionalGeneration.from_pretrained(model_id) +processor = AutoProcessor.from_pretrained(model_id) + +prompt = "What is on the flower?" +image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true" +raw_image = Image.open(requests.get(image_file, stream=True).raw) +inputs = processor(prompt, raw_image, return_tensors="pt") +output = model.generate(**inputs, max_new_tokens=20) + +print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):]) +``` + +- PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding. +- One can use `PaliGemmaProcessor` to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the `suffix` argument can be passed to the processor which creates the `labels` for the model: + +```python +prompt = "What is on the flower?" +answer = "a bee" +inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt") +``` + +## Resources + +A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource. + +- A blog post introducing all the features of PaliGemma can be found [here](https://huggingface.co/blog/paligemma). +- Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found [here](https://github.com/huggingface/notebooks/tree/main/examples/paligemma). +- Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/PaliGemma). 🌎 ## PaliGemmaConfig diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 56e28622bd..b9aab2f1b9 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -38,12 +38,17 @@ to repeatedly detect a much richer set of interest points than the initial pre-a traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches when compared to LIFT, SIFT and ORB.* -## How to use + + + SuperPoint overview. Taken from the original paper. + +## Usage tips Here is a quick example of using the model to detect interest points in an image: ```python -from transformers import AutoImageProcessor, AutoModel +from transformers import AutoImageProcessor, SuperPointForKeypointDetection import torch from PIL import Image import requests @@ -52,7 +57,7 @@ url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = Image.open(requests.get(url, stream=True).raw) processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint") -model = AutoModel.from_pretrained("magic-leap-community/superpoint") +model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint") inputs = processor(image, return_tensors="pt") outputs = model(**inputs) @@ -64,7 +69,7 @@ You can also feed multiple images to the model. Due to the nature of SuperPoint, you will need to use the mask attribute to retrieve the respective information : ```python -from transformers import AutoImageProcessor, AutoModel +from transformers import AutoImageProcessor, SuperPointForKeypointDetection import torch from PIL import Image import requests @@ -77,7 +82,7 @@ image_2 = Image.open(requests.get(url_image_2, stream=True).raw) images = [image_1, image_2] processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint") -model = AutoModel.from_pretrained("magic-leap-community/superpoint") +model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint") inputs = processor(images, return_tensors="pt") outputs = model(**inputs) @@ -103,6 +108,12 @@ cv2.imwrite("output_image.png", image) This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille). The original code can be found [here](https://github.com/magicleap/SuperPointPretrainedNetwork). +## Resources + +A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource. + +- A notebook showcasing inference and visualization with SuperPoint can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SuperPoint/Inference_with_SuperPoint_to_detect_interest_points_in_an_image.ipynb). 🌎 + ## SuperPointConfig [[autodoc]] SuperPointConfig From a3c7b59e31710f76492146db1d87ba77e3ae265d Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Tue, 28 May 2024 13:34:23 +0200 Subject: [PATCH 11/36] Fix failing tokenizer tests (#31083) * Fix failing tokenizer tests * Use small tokenizer * Fix remaining reference --- .../models/cohere/test_tokenization_cohere.py | 44 +++++++------------ 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/tests/models/cohere/test_tokenization_cohere.py b/tests/models/cohere/test_tokenization_cohere.py index 62e679e34f..56f93a0a96 100644 --- a/tests/models/cohere/test_tokenization_cohere.py +++ b/tests/models/cohere/test_tokenization_cohere.py @@ -29,7 +29,7 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase): test_rust_tokenizer = True test_slow_tokenizer = False from_pretrained_vocab_key = "tokenizer_file" - from_pretrained_id = "CohereForAI/c4ai-command-r-v01" + from_pretrained_id = "hf-internal-testing/tiny-random-CohereForCausalLM" special_tokens_map = { "bos_token": "", "eos_token": "<|END_OF_TURN_TOKEN|>", @@ -39,7 +39,7 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def setUp(self): super().setUp() - tokenizer = CohereTokenizerFast.from_pretrained("CohereForAI/c4ai-command-r-v01") + tokenizer = CohereTokenizerFast.from_pretrained("hf-internal-testing/tiny-random-CohereForCausalLM") tokenizer.save_pretrained(self.tmpdirname) def get_rust_tokenizer(self, **kwargs): @@ -57,7 +57,10 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_rust_tokenizer() INPUT_SENTENCES = ["The quick brown fox<|END_OF_TURN_TOKEN|>", "jumps over the lazy dog<|END_OF_TURN_TOKEN|>"] - TARGET_TOKENS = [[5, 2162, 6629, 19883, 73388, 255001], [5, 81, 25092, 2515, 1690, 46189, 9507, 255001]] + TARGET_TOKENS = [ + [5, 60, 203, 746, 666, 980, 571, 222, 87, 96, 8], + [5, 82, 332, 88, 91, 544, 206, 257, 930, 97, 239, 435, 8], + ] computed_tokens = tokenizer.batch_encode_plus(INPUT_SENTENCES)["input_ids"] self.assertListEqual(TARGET_TOKENS, computed_tokens) @@ -141,34 +144,17 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ], ] tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] + # fmt: off expected_tokens = [ - [5, 255000, 255008, 5659, 1955, 1671, 19264, 171597, 21, 255001, 255000, 255006, 28339, 8, 255001], - [ - 5, - 255000, - 255008, - 5659, - 1955, - 1671, - 19264, - 171597, - 21, - 255001, - 255000, - 255006, - 28339, - 8, - 255001, - 255000, - 255007, - 97190, - 1726, - 5694, - 1933, - 21, - 255001, - ], + [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8], + [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, + 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, + 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, + 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, + 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 43, 48, 41, 60, 42, 55, 60, 71, 60, 55, 51, 45, 54, 99, 38, + 54, 567, 235, 693, 276, 411, 243, 22, 8] ] + # fmt: on for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): self.assertListEqual(tokenized_chat, expected_tokens) From 779bc360ff4f3965a1ac29fdc02c43db7ede08c0 Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Tue, 28 May 2024 17:07:42 +0500 Subject: [PATCH 12/36] Watermark: fix tests (#30961) * fix tests * style * Update tests/generation/test_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- tests/generation/test_utils.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 7d654312a3..57b6c6d188 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2148,6 +2148,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi watermark_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash") _ = model.generate(**model_inputs, watermarking_config=watermark_config, do_sample=False, max_length=15) + # We will not check watermarked text, since we check it in `logits_processors` tests + # Checking if generated ids are as expected fails on different hardware args = { "bias": 2.0, "context_width": 1, @@ -2158,19 +2160,11 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi output = model.generate(**model_inputs, do_sample=False, max_length=15) output_selfhash = model.generate(**model_inputs, watermarking_config=args, do_sample=False, max_length=15) - # check that the watermarked text is generating what is should - self.assertListEqual( - output.tolist(), [[40, 481, 307, 262, 717, 284, 9159, 326, 314, 716, 407, 257, 4336, 286, 262]] - ) - self.assertListEqual( - output_selfhash.tolist(), [[40, 481, 307, 2263, 616, 640, 284, 651, 616, 1621, 503, 612, 553, 531, 367]] - ) - + # Check that the detector is detecting watermarked text detector = WatermarkDetector(model_config=model.config, device=torch_device, watermarking_config=args) detection_out_watermarked = detector(output_selfhash[:, input_len:], return_dict=True) detection_out = detector(output[:, input_len:], return_dict=True) - # check that the detector is detecting watermarked text self.assertListEqual(detection_out_watermarked.prediction.tolist(), [True]) self.assertListEqual(detection_out.prediction.tolist(), [False]) From 4f98b14465562e4a8f855f9488ba79a4350d2909 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Tue, 28 May 2024 15:04:43 +0200 Subject: [PATCH 13/36] Docs / PEFT: Add PEFT API documentation (#31078) * add peft references * add peft references * Update docs/source/en/peft.md * Update docs/source/en/peft.md --- docs/source/en/peft.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/source/en/peft.md b/docs/source/en/peft.md index d86a36e624..9e2ac805b2 100644 --- a/docs/source/en/peft.md +++ b/docs/source/en/peft.md @@ -81,6 +81,8 @@ model = AutoModelForCausalLM.from_pretrained(model_id) model.load_adapter(peft_model_id) ``` +Check out the [API documentation](#transformers.integrations.PeftAdapterMixin) section below for more details. + ## Load in 8bit or 4bit The `bitsandbytes` integration supports 8bit and 4bit precision data types, which are useful for loading large models because it saves memory (see the `bitsandbytes` integration [guide](./quantization#bitsandbytes-integration) to learn more). Add the `load_in_8bit` or `load_in_4bit` parameters to [`~PreTrainedModel.from_pretrained`] and set `device_map="auto"` to effectively distribute the model to your hardware: @@ -227,6 +229,19 @@ lora_config = LoraConfig( model.add_adapter(lora_config) ``` +## API docs + +[[autodoc]] integrations.PeftAdapterMixin + - load_adapter + - add_adapter + - set_adapter + - disable_adapters + - enable_adapters + - active_adapters + - get_adapter_state_dict + + + # Jukebox + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The Jukebox model was proposed in [Jukebox: A generative model for music](https://arxiv.org/pdf/2005.00341.pdf) @@ -27,7 +35,7 @@ The abstract from the paper is the following: *We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.* As shown on the following figure, Jukebox is made of 3 `priors` which are decoder only models. They follow the architecture described in [Generating Long Sequences with Sparse Transformers](https://arxiv.org/abs/1904.10509), modified to support longer context length. -First, a autoencoder is used to encode the text lyrics. Next, the first (also called `top_prior`) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an `AudioConditioner` module. The`AudioConditioner` upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution. +First, a autoencoder is used to encode the text lyrics. Next, the first (also called `top_prior`) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an `AudioConditioner` module. The`AudioConditioner` upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution. The metadata such as *artist, genre and timing* are passed to each prior, in the form of a start token and positional embedding for the timing data. The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio. ![JukeboxModel](https://gist.githubusercontent.com/ArthurZucker/92c1acaae62ebf1b6a951710bdd8b6af/raw/c9c517bf4eff61393f6c7dec9366ef02bdd059a3/jukebox.svg) diff --git a/docs/source/en/model_doc/mega.md b/docs/source/en/model_doc/mega.md index 4ce62ca45a..5545f5e19c 100644 --- a/docs/source/en/model_doc/mega.md +++ b/docs/source/en/model_doc/mega.md @@ -16,12 +16,20 @@ rendered properly in your Markdown viewer. # MEGA + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The MEGA model was proposed in [Mega: Moving Average Equipped Gated Attention](https://arxiv.org/abs/2209.10655) by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer. -MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism -stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA -while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an +MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism +stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA +while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an attractive option for long-document NLP tasks. The abstract from the paper is the following: @@ -34,8 +42,8 @@ The original code can be found [here](https://github.com/facebookresearch/mega). ## Usage tips -- MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set `bidirectional=False` to avoid errors with default bidirectional. -- Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size +- MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set `bidirectional=False` to avoid errors with default bidirectional. +- Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size ## Implementation Notes diff --git a/docs/source/en/model_doc/nat.md b/docs/source/en/model_doc/nat.md index ecb61ccb0a..02c2e466cc 100644 --- a/docs/source/en/model_doc/nat.md +++ b/docs/source/en/model_doc/nat.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # Neighborhood Attention Transformer + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview NAT was proposed in [Neighborhood Attention Transformer](https://arxiv.org/abs/2204.07143) diff --git a/docs/source/en/model_doc/nezha.md b/docs/source/en/model_doc/nezha.md index 872f576f12..976722592c 100644 --- a/docs/source/en/model_doc/nezha.md +++ b/docs/source/en/model_doc/nezha.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # Nezha + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The Nezha model was proposed in [NEZHA: Neural Contextualized Representation for Chinese Language Understanding](https://arxiv.org/abs/1909.00204) by Junqiu Wei et al. @@ -25,8 +33,8 @@ The abstract from the paper is the following: *The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora. In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed -representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks. -The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional +representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks. +The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy, Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including @@ -85,4 +93,4 @@ This model was contributed by [sijunhe](https://huggingface.co/sijunhe). The ori ## NezhaForQuestionAnswering [[autodoc]] NezhaForQuestionAnswering - - forward \ No newline at end of file + - forward diff --git a/docs/source/en/model_doc/qdqbert.md b/docs/source/en/model_doc/qdqbert.md index 19b829d0bc..ca718f34af 100644 --- a/docs/source/en/model_doc/qdqbert.md +++ b/docs/source/en/model_doc/qdqbert.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # QDQBERT + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The QDQBERT model can be referenced in [Integer Quantization for Deep Learning Inference: Principles and Empirical diff --git a/docs/source/en/model_doc/realm.md b/docs/source/en/model_doc/realm.md index a8227bc83c..558e83c08b 100644 --- a/docs/source/en/model_doc/realm.md +++ b/docs/source/en/model_doc/realm.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # REALM + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The REALM model was proposed in [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a @@ -86,4 +94,4 @@ This model was contributed by [qqaatw](https://huggingface.co/qqaatw). The origi [[autodoc]] RealmForOpenQA - block_embedding_to - - forward \ No newline at end of file + - forward diff --git a/docs/source/en/model_doc/speech_to_text_2.md b/docs/source/en/model_doc/speech_to_text_2.md index 6648e67f62..fc2d0357c5 100644 --- a/docs/source/en/model_doc/speech_to_text_2.md +++ b/docs/source/en/model_doc/speech_to_text_2.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # Speech2Text2 + + + This model is in maintenance mode only, we don't accept any new PRs changing its code. + If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. + You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The Speech2Text2 model is used together with [Wav2Vec2](wav2vec2) for Speech Translation models proposed in diff --git a/docs/source/en/model_doc/tvlt.md b/docs/source/en/model_doc/tvlt.md index f09ea8af86..0a0f50e473 100644 --- a/docs/source/en/model_doc/tvlt.md +++ b/docs/source/en/model_doc/tvlt.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # TVLT + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The TVLT model was proposed in [TVLT: Textless Vision-Language Transformer](https://arxiv.org/abs/2209.14156) @@ -60,7 +68,7 @@ The original code can be found [here](https://github.com/zinengtang/TVLT). This [[autodoc]] TvltFeatureExtractor - __call__ - + ## TvltModel [[autodoc]] TvltModel diff --git a/docs/source/en/model_doc/vit_hybrid.md b/docs/source/en/model_doc/vit_hybrid.md index ec98fc5e1e..5cde5e5298 100644 --- a/docs/source/en/model_doc/vit_hybrid.md +++ b/docs/source/en/model_doc/vit_hybrid.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # Hybrid Vision Transformer (ViT Hybrid) + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + + ## Overview The hybrid Vision Transformer (ViT) model was proposed in [An Image is Worth 16x16 Words: Transformers for Image Recognition diff --git a/docs/source/en/model_doc/xclip.md b/docs/source/en/model_doc/xclip.md index 45c4c3db74..8c22747387 100644 --- a/docs/source/en/model_doc/xclip.md +++ b/docs/source/en/model_doc/xclip.md @@ -30,7 +30,7 @@ Tips: - Usage of X-CLIP is identical to [CLIP](clip). +alt="drawing" width="600"/> X-CLIP architecture. Taken from the original paper. diff --git a/docs/source/en/model_doc/xlm-prophetnet.md b/docs/source/en/model_doc/xlm-prophetnet.md index 7a61aeb3e3..b350cb554b 100644 --- a/docs/source/en/model_doc/xlm-prophetnet.md +++ b/docs/source/en/model_doc/xlm-prophetnet.md @@ -16,6 +16,14 @@ rendered properly in your Markdown viewer. # XLM-ProphetNet + + +This model is in maintenance mode only, we don't accept any new PRs changing its code. +If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2. +You can do so by running the following command: `pip install -U transformers==4.40.2`. + + +
Models diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index fc8f6b1a9c..40b7905bfd 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -321,17 +321,44 @@ _import_structure = { "models.deit": ["DeiTConfig"], "models.deprecated": [], "models.deprecated.bort": [], + "models.deprecated.deta": ["DetaConfig"], + "models.deprecated.efficientformer": ["EfficientFormerConfig"], + "models.deprecated.ernie_m": ["ErnieMConfig"], + "models.deprecated.gptsan_japanese": [ + "GPTSanJapaneseConfig", + "GPTSanJapaneseTokenizer", + ], + "models.deprecated.graphormer": ["GraphormerConfig"], + "models.deprecated.jukebox": [ + "JukeboxConfig", + "JukeboxPriorConfig", + "JukeboxTokenizer", + "JukeboxVQVAEConfig", + ], "models.deprecated.mctct": [ "MCTCTConfig", "MCTCTFeatureExtractor", "MCTCTProcessor", ], + "models.deprecated.mega": ["MegaConfig"], "models.deprecated.mmbt": ["MMBTConfig"], + "models.deprecated.nat": ["NatConfig"], + "models.deprecated.nezha": ["NezhaConfig"], "models.deprecated.open_llama": ["OpenLlamaConfig"], + "models.deprecated.qdqbert": ["QDQBertConfig"], + "models.deprecated.realm": [ + "RealmConfig", + "RealmTokenizer", + ], "models.deprecated.retribert": [ "RetriBertConfig", "RetriBertTokenizer", ], + "models.deprecated.speech_to_text_2": [ + "Speech2Text2Config", + "Speech2Text2Processor", + "Speech2Text2Tokenizer", + ], "models.deprecated.tapex": ["TapexTokenizer"], "models.deprecated.trajectory_transformer": ["TrajectoryTransformerConfig"], "models.deprecated.transfo_xl": [ @@ -339,9 +366,15 @@ _import_structure = { "TransfoXLCorpus", "TransfoXLTokenizer", ], + "models.deprecated.tvlt": [ + "TvltConfig", + "TvltFeatureExtractor", + "TvltProcessor", + ], "models.deprecated.van": ["VanConfig"], + "models.deprecated.vit_hybrid": ["ViTHybridConfig"], + "models.deprecated.xlm_prophetnet": ["XLMProphetNetConfig"], "models.depth_anything": ["DepthAnythingConfig"], - "models.deta": ["DetaConfig"], "models.detr": ["DetrConfig"], "models.dialogpt": [], "models.dinat": ["DinatConfig"], @@ -363,7 +396,6 @@ _import_structure = { "DPRReaderTokenizer", ], "models.dpt": ["DPTConfig"], - "models.efficientformer": ["EfficientFormerConfig"], "models.efficientnet": ["EfficientNetConfig"], "models.electra": [ "ElectraConfig", @@ -375,7 +407,6 @@ _import_structure = { ], "models.encoder_decoder": ["EncoderDecoderConfig"], "models.ernie": ["ErnieConfig"], - "models.ernie_m": ["ErnieMConfig"], "models.esm": ["EsmConfig", "EsmTokenizer"], "models.falcon": ["FalconConfig"], "models.fastspeech2_conformer": [ @@ -420,11 +451,6 @@ _import_structure = { "models.gpt_neox_japanese": ["GPTNeoXJapaneseConfig"], "models.gpt_sw3": [], "models.gptj": ["GPTJConfig"], - "models.gptsan_japanese": [ - "GPTSanJapaneseConfig", - "GPTSanJapaneseTokenizer", - ], - "models.graphormer": ["GraphormerConfig"], "models.grounding_dino": [ "GroundingDinoConfig", "GroundingDinoProcessor", @@ -449,12 +475,6 @@ _import_structure = { ], "models.jamba": ["JambaConfig"], "models.jetmoe": ["JetMoeConfig"], - "models.jukebox": [ - "JukeboxConfig", - "JukeboxPriorConfig", - "JukeboxTokenizer", - "JukeboxVQVAEConfig", - ], "models.kosmos2": [ "Kosmos2Config", "Kosmos2Processor", @@ -519,7 +539,6 @@ _import_structure = { ], "models.mbart": ["MBartConfig"], "models.mbart50": [], - "models.mega": ["MegaConfig"], "models.megatron_bert": ["MegatronBertConfig"], "models.megatron_gpt2": [], "models.mgp_str": [ @@ -554,8 +573,6 @@ _import_structure = { "MusicgenMelodyDecoderConfig", ], "models.mvp": ["MvpConfig", "MvpTokenizer"], - "models.nat": ["NatConfig"], - "models.nezha": ["NezhaConfig"], "models.nllb": [], "models.nllb_moe": ["NllbMoeConfig"], "models.nougat": ["NougatProcessor"], @@ -613,17 +630,12 @@ _import_structure = { ], "models.pvt": ["PvtConfig"], "models.pvt_v2": ["PvtV2Config"], - "models.qdqbert": ["QDQBertConfig"], "models.qwen2": [ "Qwen2Config", "Qwen2Tokenizer", ], "models.qwen2_moe": ["Qwen2MoeConfig"], "models.rag": ["RagConfig", "RagRetriever", "RagTokenizer"], - "models.realm": [ - "RealmConfig", - "RealmTokenizer", - ], "models.recurrent_gemma": ["RecurrentGemmaConfig"], "models.reformer": ["ReformerConfig"], "models.regnet": ["RegNetConfig"], @@ -672,11 +684,6 @@ _import_structure = { "Speech2TextFeatureExtractor", "Speech2TextProcessor", ], - "models.speech_to_text_2": [ - "Speech2Text2Config", - "Speech2Text2Processor", - "Speech2Text2Tokenizer", - ], "models.speecht5": [ "SpeechT5Config", "SpeechT5FeatureExtractor", @@ -712,11 +719,6 @@ _import_structure = { "TrOCRConfig", "TrOCRProcessor", ], - "models.tvlt": [ - "TvltConfig", - "TvltFeatureExtractor", - "TvltProcessor", - ], "models.tvp": [ "TvpConfig", "TvpProcessor", @@ -749,7 +751,6 @@ _import_structure = { ], "models.visual_bert": ["VisualBertConfig"], "models.vit": ["ViTConfig"], - "models.vit_hybrid": ["ViTHybridConfig"], "models.vit_mae": ["ViTMAEConfig"], "models.vit_msn": ["ViTMSNConfig"], "models.vitdet": ["VitDetConfig"], @@ -788,7 +789,6 @@ _import_structure = { ], "models.xglm": ["XGLMConfig"], "models.xlm": ["XLMConfig", "XLMTokenizer"], - "models.xlm_prophetnet": ["XLMProphetNetConfig"], "models.xlm_roberta": ["XLMRobertaConfig"], "models.xlm_roberta_xl": ["XLMRobertaXLConfig"], "models.xlnet": ["XLNetConfig"], @@ -943,7 +943,8 @@ else: _import_structure["models.code_llama"].append("CodeLlamaTokenizer") _import_structure["models.cpm"].append("CpmTokenizer") _import_structure["models.deberta_v2"].append("DebertaV2Tokenizer") - _import_structure["models.ernie_m"].append("ErnieMTokenizer") + _import_structure["models.deprecated.ernie_m"].append("ErnieMTokenizer") + _import_structure["models.deprecated.xlm_prophetnet"].append("XLMProphetNetTokenizer") _import_structure["models.fnet"].append("FNetTokenizer") _import_structure["models.gemma"].append("GemmaTokenizer") _import_structure["models.gpt_sw3"].append("GPTSw3Tokenizer") @@ -967,7 +968,6 @@ else: _import_structure["models.t5"].append("T5Tokenizer") _import_structure["models.udop"].append("UdopTokenizer") _import_structure["models.xglm"].append("XGLMTokenizer") - _import_structure["models.xlm_prophetnet"].append("XLMProphetNetTokenizer") _import_structure["models.xlm_roberta"].append("XLMRobertaTokenizer") _import_structure["models.xlnet"].append("XLNetTokenizer") @@ -1000,6 +1000,7 @@ else: _import_structure["models.cpm"].append("CpmTokenizerFast") _import_structure["models.deberta"].append("DebertaTokenizerFast") _import_structure["models.deberta_v2"].append("DebertaV2TokenizerFast") + _import_structure["models.deprecated.realm"].append("RealmTokenizerFast") _import_structure["models.deprecated.retribert"].append("RetriBertTokenizerFast") _import_structure["models.distilbert"].append("DistilBertTokenizerFast") _import_structure["models.dpr"].extend( @@ -1037,7 +1038,6 @@ else: _import_structure["models.openai"].append("OpenAIGPTTokenizerFast") _import_structure["models.pegasus"].append("PegasusTokenizerFast") _import_structure["models.qwen2"].append("Qwen2TokenizerFast") - _import_structure["models.realm"].append("RealmTokenizerFast") _import_structure["models.reformer"].append("ReformerTokenizerFast") _import_structure["models.rembert"].append("RemBertTokenizerFast") _import_structure["models.roberta"].append("RobertaTokenizerFast") @@ -1122,11 +1122,13 @@ else: ["DeformableDetrFeatureExtractor", "DeformableDetrImageProcessor"] ) _import_structure["models.deit"].extend(["DeiTFeatureExtractor", "DeiTImageProcessor"]) - _import_structure["models.deta"].append("DetaImageProcessor") + _import_structure["models.deprecated.deta"].append("DetaImageProcessor") + _import_structure["models.deprecated.efficientformer"].append("EfficientFormerImageProcessor") + _import_structure["models.deprecated.tvlt"].append("TvltImageProcessor") + _import_structure["models.deprecated.vit_hybrid"].extend(["ViTHybridImageProcessor"]) _import_structure["models.detr"].extend(["DetrFeatureExtractor", "DetrImageProcessor"]) _import_structure["models.donut"].extend(["DonutFeatureExtractor", "DonutImageProcessor"]) _import_structure["models.dpt"].extend(["DPTFeatureExtractor", "DPTImageProcessor"]) - _import_structure["models.efficientformer"].append("EfficientFormerImageProcessor") _import_structure["models.efficientnet"].append("EfficientNetImageProcessor") _import_structure["models.flava"].extend(["FlavaFeatureExtractor", "FlavaImageProcessor", "FlavaProcessor"]) _import_structure["models.fuyu"].extend(["FuyuImageProcessor", "FuyuProcessor"]) @@ -1158,13 +1160,11 @@ else: _import_structure["models.siglip"].append("SiglipImageProcessor") _import_structure["models.superpoint"].extend(["SuperPointImageProcessor"]) _import_structure["models.swin2sr"].append("Swin2SRImageProcessor") - _import_structure["models.tvlt"].append("TvltImageProcessor") _import_structure["models.tvp"].append("TvpImageProcessor") _import_structure["models.video_llava"].append("VideoLlavaImageProcessor") _import_structure["models.videomae"].extend(["VideoMAEFeatureExtractor", "VideoMAEImageProcessor"]) _import_structure["models.vilt"].extend(["ViltFeatureExtractor", "ViltImageProcessor", "ViltProcessor"]) _import_structure["models.vit"].extend(["ViTFeatureExtractor", "ViTImageProcessor"]) - _import_structure["models.vit_hybrid"].extend(["ViTHybridImageProcessor"]) _import_structure["models.vitmatte"].append("VitMatteImageProcessor") _import_structure["models.vivit"].append("VivitImageProcessor") _import_structure["models.yolos"].extend(["YolosFeatureExtractor", "YolosImageProcessor"]) @@ -1767,6 +1767,54 @@ else: "DeiTPreTrainedModel", ] ) + _import_structure["models.deprecated.deta"].extend( + [ + "DetaForObjectDetection", + "DetaModel", + "DetaPreTrainedModel", + ] + ) + _import_structure["models.deprecated.efficientformer"].extend( + [ + "EfficientFormerForImageClassification", + "EfficientFormerForImageClassificationWithTeacher", + "EfficientFormerModel", + "EfficientFormerPreTrainedModel", + ] + ) + _import_structure["models.deprecated.ernie_m"].extend( + [ + "ErnieMForInformationExtraction", + "ErnieMForMultipleChoice", + "ErnieMForQuestionAnswering", + "ErnieMForSequenceClassification", + "ErnieMForTokenClassification", + "ErnieMModel", + "ErnieMPreTrainedModel", + ] + ) + _import_structure["models.deprecated.gptsan_japanese"].extend( + [ + "GPTSanJapaneseForConditionalGeneration", + "GPTSanJapaneseModel", + "GPTSanJapanesePreTrainedModel", + ] + ) + _import_structure["models.deprecated.graphormer"].extend( + [ + "GraphormerForGraphClassification", + "GraphormerModel", + "GraphormerPreTrainedModel", + ] + ) + _import_structure["models.deprecated.jukebox"].extend( + [ + "JukeboxModel", + "JukeboxPreTrainedModel", + "JukeboxPrior", + "JukeboxVQVAE", + ] + ) _import_structure["models.deprecated.mctct"].extend( [ "MCTCTForCTC", @@ -1774,7 +1822,40 @@ else: "MCTCTPreTrainedModel", ] ) + _import_structure["models.deprecated.mega"].extend( + [ + "MegaForCausalLM", + "MegaForMaskedLM", + "MegaForMultipleChoice", + "MegaForQuestionAnswering", + "MegaForSequenceClassification", + "MegaForTokenClassification", + "MegaModel", + "MegaPreTrainedModel", + ] + ) _import_structure["models.deprecated.mmbt"].extend(["MMBTForClassification", "MMBTModel", "ModalEmbeddings"]) + _import_structure["models.deprecated.nat"].extend( + [ + "NatBackbone", + "NatForImageClassification", + "NatModel", + "NatPreTrainedModel", + ] + ) + _import_structure["models.deprecated.nezha"].extend( + [ + "NezhaForMaskedLM", + "NezhaForMultipleChoice", + "NezhaForNextSentencePrediction", + "NezhaForPreTraining", + "NezhaForQuestionAnswering", + "NezhaForSequenceClassification", + "NezhaForTokenClassification", + "NezhaModel", + "NezhaPreTrainedModel", + ] + ) _import_structure["models.deprecated.open_llama"].extend( [ "OpenLlamaForCausalLM", @@ -1783,12 +1864,42 @@ else: "OpenLlamaPreTrainedModel", ] ) + _import_structure["models.deprecated.qdqbert"].extend( + [ + "QDQBertForMaskedLM", + "QDQBertForMultipleChoice", + "QDQBertForNextSentencePrediction", + "QDQBertForQuestionAnswering", + "QDQBertForSequenceClassification", + "QDQBertForTokenClassification", + "QDQBertLayer", + "QDQBertLMHeadModel", + "QDQBertModel", + "QDQBertPreTrainedModel", + "load_tf_weights_in_qdqbert", + ] + ) + _import_structure["models.deprecated.realm"].extend( + [ + "RealmEmbedder", + "RealmForOpenQA", + "RealmKnowledgeAugEncoder", + "RealmPreTrainedModel", + "RealmReader", + "RealmRetriever", + "RealmScorer", + "load_tf_weights_in_realm", + ] + ) _import_structure["models.deprecated.retribert"].extend( [ "RetriBertModel", "RetriBertPreTrainedModel", ] ) + _import_structure["models.deprecated.speech_to_text_2"].extend( + ["Speech2Text2ForCausalLM", "Speech2Text2PreTrainedModel"] + ) _import_structure["models.deprecated.trajectory_transformer"].extend( [ "TrajectoryTransformerModel", @@ -1805,6 +1916,14 @@ else: "load_tf_weights_in_transfo_xl", ] ) + _import_structure["models.deprecated.tvlt"].extend( + [ + "TvltForAudioVisualClassification", + "TvltForPreTraining", + "TvltModel", + "TvltPreTrainedModel", + ] + ) _import_structure["models.deprecated.van"].extend( [ "VanForImageClassification", @@ -1812,19 +1931,29 @@ else: "VanPreTrainedModel", ] ) + _import_structure["models.deprecated.vit_hybrid"].extend( + [ + "ViTHybridForImageClassification", + "ViTHybridModel", + "ViTHybridPreTrainedModel", + ] + ) + _import_structure["models.deprecated.xlm_prophetnet"].extend( + [ + "XLMProphetNetDecoder", + "XLMProphetNetEncoder", + "XLMProphetNetForCausalLM", + "XLMProphetNetForConditionalGeneration", + "XLMProphetNetModel", + "XLMProphetNetPreTrainedModel", + ] + ) _import_structure["models.depth_anything"].extend( [ "DepthAnythingForDepthEstimation", "DepthAnythingPreTrainedModel", ] ) - _import_structure["models.deta"].extend( - [ - "DetaForObjectDetection", - "DetaModel", - "DetaPreTrainedModel", - ] - ) _import_structure["models.detr"].extend( [ "DetrForObjectDetection", @@ -1885,14 +2014,6 @@ else: "DPTPreTrainedModel", ] ) - _import_structure["models.efficientformer"].extend( - [ - "EfficientFormerForImageClassification", - "EfficientFormerForImageClassificationWithTeacher", - "EfficientFormerModel", - "EfficientFormerPreTrainedModel", - ] - ) _import_structure["models.efficientnet"].extend( [ "EfficientNetForImageClassification", @@ -1935,17 +2056,6 @@ else: "ErniePreTrainedModel", ] ) - _import_structure["models.ernie_m"].extend( - [ - "ErnieMForInformationExtraction", - "ErnieMForMultipleChoice", - "ErnieMForQuestionAnswering", - "ErnieMForSequenceClassification", - "ErnieMForTokenClassification", - "ErnieMModel", - "ErnieMPreTrainedModel", - ] - ) _import_structure["models.esm"].extend( [ "EsmFoldPreTrainedModel", @@ -2121,20 +2231,6 @@ else: "GPTJPreTrainedModel", ] ) - _import_structure["models.gptsan_japanese"].extend( - [ - "GPTSanJapaneseForConditionalGeneration", - "GPTSanJapaneseModel", - "GPTSanJapanesePreTrainedModel", - ] - ) - _import_structure["models.graphormer"].extend( - [ - "GraphormerForGraphClassification", - "GraphormerModel", - "GraphormerPreTrainedModel", - ] - ) _import_structure["models.grounding_dino"].extend( [ "GroundingDinoForObjectDetection", @@ -2225,14 +2321,6 @@ else: "JetMoePreTrainedModel", ] ) - _import_structure["models.jukebox"].extend( - [ - "JukeboxModel", - "JukeboxPreTrainedModel", - "JukeboxPrior", - "JukeboxVQVAE", - ] - ) _import_structure["models.kosmos2"].extend( [ "Kosmos2ForConditionalGeneration", @@ -2410,18 +2498,6 @@ else: "MBartPreTrainedModel", ] ) - _import_structure["models.mega"].extend( - [ - "MegaForCausalLM", - "MegaForMaskedLM", - "MegaForMultipleChoice", - "MegaForQuestionAnswering", - "MegaForSequenceClassification", - "MegaForTokenClassification", - "MegaModel", - "MegaPreTrainedModel", - ] - ) _import_structure["models.megatron_bert"].extend( [ "MegatronBertForCausalLM", @@ -2580,27 +2656,6 @@ else: "MvpPreTrainedModel", ] ) - _import_structure["models.nat"].extend( - [ - "NatBackbone", - "NatForImageClassification", - "NatModel", - "NatPreTrainedModel", - ] - ) - _import_structure["models.nezha"].extend( - [ - "NezhaForMaskedLM", - "NezhaForMultipleChoice", - "NezhaForNextSentencePrediction", - "NezhaForPreTraining", - "NezhaForQuestionAnswering", - "NezhaForSequenceClassification", - "NezhaForTokenClassification", - "NezhaModel", - "NezhaPreTrainedModel", - ] - ) _import_structure["models.nllb_moe"].extend( [ "NllbMoeForConditionalGeneration", @@ -2811,21 +2866,6 @@ else: "PvtV2PreTrainedModel", ] ) - _import_structure["models.qdqbert"].extend( - [ - "QDQBertForMaskedLM", - "QDQBertForMultipleChoice", - "QDQBertForNextSentencePrediction", - "QDQBertForQuestionAnswering", - "QDQBertForSequenceClassification", - "QDQBertForTokenClassification", - "QDQBertLayer", - "QDQBertLMHeadModel", - "QDQBertModel", - "QDQBertPreTrainedModel", - "load_tf_weights_in_qdqbert", - ] - ) _import_structure["models.qwen2"].extend( [ "Qwen2ForCausalLM", @@ -2852,18 +2892,6 @@ else: "RagTokenForGeneration", ] ) - _import_structure["models.realm"].extend( - [ - "RealmEmbedder", - "RealmForOpenQA", - "RealmKnowledgeAugEncoder", - "RealmPreTrainedModel", - "RealmReader", - "RealmRetriever", - "RealmScorer", - "load_tf_weights_in_realm", - ] - ) _import_structure["models.recurrent_gemma"].extend( [ "RecurrentGemmaForCausalLM", @@ -3052,7 +3080,6 @@ else: "Speech2TextPreTrainedModel", ] ) - _import_structure["models.speech_to_text_2"].extend(["Speech2Text2ForCausalLM", "Speech2Text2PreTrainedModel"]) _import_structure["models.speecht5"].extend( [ "SpeechT5ForSpeechToSpeech", @@ -3200,14 +3227,6 @@ else: "TrOCRPreTrainedModel", ] ) - _import_structure["models.tvlt"].extend( - [ - "TvltForAudioVisualClassification", - "TvltForPreTraining", - "TvltModel", - "TvltPreTrainedModel", - ] - ) _import_structure["models.tvp"].extend( [ "TvpForVideoGrounding", @@ -3320,13 +3339,6 @@ else: "ViTPreTrainedModel", ] ) - _import_structure["models.vit_hybrid"].extend( - [ - "ViTHybridForImageClassification", - "ViTHybridModel", - "ViTHybridPreTrainedModel", - ] - ) _import_structure["models.vit_mae"].extend( [ "ViTMAEForPreTraining", @@ -3447,16 +3459,6 @@ else: "XLMWithLMHeadModel", ] ) - _import_structure["models.xlm_prophetnet"].extend( - [ - "XLMProphetNetDecoder", - "XLMProphetNetEncoder", - "XLMProphetNetForCausalLM", - "XLMProphetNetForConditionalGeneration", - "XLMProphetNetModel", - "XLMProphetNetPreTrainedModel", - ] - ) _import_structure["models.xlm_roberta"].extend( [ "XLMRobertaForCausalLM", @@ -3799,6 +3801,14 @@ else: "TFDeiTPreTrainedModel", ] ) + _import_structure["models.deprecated.efficientformer"].extend( + [ + "TFEfficientFormerForImageClassification", + "TFEfficientFormerForImageClassificationWithTeacher", + "TFEfficientFormerModel", + "TFEfficientFormerPreTrainedModel", + ] + ) _import_structure["models.deprecated.transfo_xl"].extend( [ "TFAdaptiveEmbedding", @@ -3831,14 +3841,6 @@ else: "TFDPRReader", ] ) - _import_structure["models.efficientformer"].extend( - [ - "TFEfficientFormerForImageClassification", - "TFEfficientFormerForImageClassificationWithTeacher", - "TFEfficientFormerModel", - "TFEfficientFormerPreTrainedModel", - ] - ) _import_structure["models.electra"].extend( [ "TFElectraForMaskedLM", @@ -4888,19 +4890,48 @@ if TYPE_CHECKING: DeformableDetrConfig, ) from .models.deit import DeiTConfig + from .models.deprecated.deta import DetaConfig + from .models.deprecated.efficientformer import ( + EfficientFormerConfig, + ) + from .models.deprecated.ernie_m import ErnieMConfig + from .models.deprecated.gptsan_japanese import ( + GPTSanJapaneseConfig, + GPTSanJapaneseTokenizer, + ) + from .models.deprecated.graphormer import GraphormerConfig + from .models.deprecated.jukebox import ( + JukeboxConfig, + JukeboxPriorConfig, + JukeboxTokenizer, + JukeboxVQVAEConfig, + ) from .models.deprecated.mctct import ( MCTCTConfig, MCTCTFeatureExtractor, MCTCTProcessor, ) + from .models.deprecated.mega import MegaConfig from .models.deprecated.mmbt import MMBTConfig + from .models.deprecated.nat import NatConfig + from .models.deprecated.nezha import NezhaConfig from .models.deprecated.open_llama import ( OpenLlamaConfig, ) + from .models.deprecated.qdqbert import QDQBertConfig + from .models.deprecated.realm import ( + RealmConfig, + RealmTokenizer, + ) from .models.deprecated.retribert import ( RetriBertConfig, RetriBertTokenizer, ) + from .models.deprecated.speech_to_text_2 import ( + Speech2Text2Config, + Speech2Text2Processor, + Speech2Text2Tokenizer, + ) from .models.deprecated.tapex import TapexTokenizer from .models.deprecated.trajectory_transformer import ( TrajectoryTransformerConfig, @@ -4910,9 +4941,19 @@ if TYPE_CHECKING: TransfoXLCorpus, TransfoXLTokenizer, ) + from .models.deprecated.tvlt import ( + TvltConfig, + TvltFeatureExtractor, + TvltProcessor, + ) from .models.deprecated.van import VanConfig + from .models.deprecated.vit_hybrid import ( + ViTHybridConfig, + ) + from .models.deprecated.xlm_prophetnet import ( + XLMProphetNetConfig, + ) from .models.depth_anything import DepthAnythingConfig - from .models.deta import DetaConfig from .models.detr import DetrConfig from .models.dinat import DinatConfig from .models.dinov2 import Dinov2Config @@ -4932,9 +4973,6 @@ if TYPE_CHECKING: DPRReaderTokenizer, ) from .models.dpt import DPTConfig - from .models.efficientformer import ( - EfficientFormerConfig, - ) from .models.efficientnet import ( EfficientNetConfig, ) @@ -4948,7 +4986,6 @@ if TYPE_CHECKING: ) from .models.encoder_decoder import EncoderDecoderConfig from .models.ernie import ErnieConfig - from .models.ernie_m import ErnieMConfig from .models.esm import EsmConfig, EsmTokenizer from .models.falcon import FalconConfig from .models.fastspeech2_conformer import ( @@ -4996,11 +5033,6 @@ if TYPE_CHECKING: GPTNeoXJapaneseConfig, ) from .models.gptj import GPTJConfig - from .models.gptsan_japanese import ( - GPTSanJapaneseConfig, - GPTSanJapaneseTokenizer, - ) - from .models.graphormer import GraphormerConfig from .models.grounding_dino import ( GroundingDinoConfig, GroundingDinoProcessor, @@ -5027,12 +5059,6 @@ if TYPE_CHECKING: ) from .models.jamba import JambaConfig from .models.jetmoe import JetMoeConfig - from .models.jukebox import ( - JukeboxConfig, - JukeboxPriorConfig, - JukeboxTokenizer, - JukeboxVQVAEConfig, - ) from .models.kosmos2 import ( Kosmos2Config, Kosmos2Processor, @@ -5098,7 +5124,6 @@ if TYPE_CHECKING: MaskFormerSwinConfig, ) from .models.mbart import MBartConfig - from .models.mega import MegaConfig from .models.megatron_bert import ( MegatronBertConfig, ) @@ -5141,8 +5166,6 @@ if TYPE_CHECKING: MusicgenMelodyDecoderConfig, ) from .models.mvp import MvpConfig, MvpTokenizer - from .models.nat import NatConfig - from .models.nezha import NezhaConfig from .models.nllb_moe import NllbMoeConfig from .models.nougat import NougatProcessor from .models.nystromformer import ( @@ -5213,14 +5236,9 @@ if TYPE_CHECKING: ) from .models.pvt import PvtConfig from .models.pvt_v2 import PvtV2Config - from .models.qdqbert import QDQBertConfig from .models.qwen2 import Qwen2Config, Qwen2Tokenizer from .models.qwen2_moe import Qwen2MoeConfig from .models.rag import RagConfig, RagRetriever, RagTokenizer - from .models.realm import ( - RealmConfig, - RealmTokenizer, - ) from .models.recurrent_gemma import RecurrentGemmaConfig from .models.reformer import ReformerConfig from .models.regnet import RegNetConfig @@ -5273,11 +5291,6 @@ if TYPE_CHECKING: Speech2TextFeatureExtractor, Speech2TextProcessor, ) - from .models.speech_to_text_2 import ( - Speech2Text2Config, - Speech2Text2Processor, - Speech2Text2Tokenizer, - ) from .models.speecht5 import ( SpeechT5Config, SpeechT5FeatureExtractor, @@ -5323,11 +5336,6 @@ if TYPE_CHECKING: TrOCRConfig, TrOCRProcessor, ) - from .models.tvlt import ( - TvltConfig, - TvltFeatureExtractor, - TvltProcessor, - ) from .models.tvp import ( TvpConfig, TvpProcessor, @@ -5365,9 +5373,6 @@ if TYPE_CHECKING: VisualBertConfig, ) from .models.vit import ViTConfig - from .models.vit_hybrid import ( - ViTHybridConfig, - ) from .models.vit_mae import ViTMAEConfig from .models.vit_msn import ViTMSNConfig from .models.vitdet import VitDetConfig @@ -5408,9 +5413,6 @@ if TYPE_CHECKING: ) from .models.xglm import XGLMConfig from .models.xlm import XLMConfig, XLMTokenizer - from .models.xlm_prophetnet import ( - XLMProphetNetConfig, - ) from .models.xlm_roberta import ( XLMRobertaConfig, ) @@ -5570,7 +5572,8 @@ if TYPE_CHECKING: from .models.code_llama import CodeLlamaTokenizer from .models.cpm import CpmTokenizer from .models.deberta_v2 import DebertaV2Tokenizer - from .models.ernie_m import ErnieMTokenizer + from .models.deprecated.ernie_m import ErnieMTokenizer + from .models.deprecated.xlm_prophetnet import XLMProphetNetTokenizer from .models.fnet import FNetTokenizer from .models.gemma import GemmaTokenizer from .models.gpt_sw3 import GPTSw3Tokenizer @@ -5593,7 +5596,6 @@ if TYPE_CHECKING: from .models.t5 import T5Tokenizer from .models.udop import UdopTokenizer from .models.xglm import XGLMTokenizer - from .models.xlm_prophetnet import XLMProphetNetTokenizer from .models.xlm_roberta import XLMRobertaTokenizer from .models.xlnet import XLNetTokenizer @@ -5621,6 +5623,7 @@ if TYPE_CHECKING: from .models.cpm import CpmTokenizerFast from .models.deberta import DebertaTokenizerFast from .models.deberta_v2 import DebertaV2TokenizerFast + from .models.deprecated.realm import RealmTokenizerFast from .models.deprecated.retribert import RetriBertTokenizerFast from .models.distilbert import DistilBertTokenizerFast from .models.dpr import ( @@ -5656,7 +5659,6 @@ if TYPE_CHECKING: from .models.openai import OpenAIGPTTokenizerFast from .models.pegasus import PegasusTokenizerFast from .models.qwen2 import Qwen2TokenizerFast - from .models.realm import RealmTokenizerFast from .models.reformer import ReformerTokenizerFast from .models.rembert import RemBertTokenizerFast from .models.roberta import RobertaTokenizerFast @@ -5726,11 +5728,13 @@ if TYPE_CHECKING: DeformableDetrImageProcessor, ) from .models.deit import DeiTFeatureExtractor, DeiTImageProcessor - from .models.deta import DetaImageProcessor + from .models.deprecated.deta import DetaImageProcessor + from .models.deprecated.efficientformer import EfficientFormerImageProcessor + from .models.deprecated.tvlt import TvltImageProcessor + from .models.deprecated.vit_hybrid import ViTHybridImageProcessor from .models.detr import DetrFeatureExtractor, DetrImageProcessor from .models.donut import DonutFeatureExtractor, DonutImageProcessor from .models.dpt import DPTFeatureExtractor, DPTImageProcessor - from .models.efficientformer import EfficientFormerImageProcessor from .models.efficientnet import EfficientNetImageProcessor from .models.flava import ( FlavaFeatureExtractor, @@ -5784,13 +5788,11 @@ if TYPE_CHECKING: from .models.siglip import SiglipImageProcessor from .models.superpoint import SuperPointImageProcessor from .models.swin2sr import Swin2SRImageProcessor - from .models.tvlt import TvltImageProcessor from .models.tvp import TvpImageProcessor from .models.video_llava import VideoLlavaImageProcessor from .models.videomae import VideoMAEFeatureExtractor, VideoMAEImageProcessor from .models.vilt import ViltFeatureExtractor, ViltImageProcessor, ViltProcessor from .models.vit import ViTFeatureExtractor, ViTImageProcessor - from .models.vit_hybrid import ViTHybridImageProcessor from .models.vitmatte import VitMatteImageProcessor from .models.vivit import VivitImageProcessor from .models.yolos import YolosFeatureExtractor, YolosImageProcessor @@ -6300,26 +6302,116 @@ if TYPE_CHECKING: DeiTModel, DeiTPreTrainedModel, ) + from .models.deprecated.deta import ( + DetaForObjectDetection, + DetaModel, + DetaPreTrainedModel, + ) + from .models.deprecated.efficientformer import ( + EfficientFormerForImageClassification, + EfficientFormerForImageClassificationWithTeacher, + EfficientFormerModel, + EfficientFormerPreTrainedModel, + ) + from .models.deprecated.ernie_m import ( + ErnieMForInformationExtraction, + ErnieMForMultipleChoice, + ErnieMForQuestionAnswering, + ErnieMForSequenceClassification, + ErnieMForTokenClassification, + ErnieMModel, + ErnieMPreTrainedModel, + ) + from .models.deprecated.gptsan_japanese import ( + GPTSanJapaneseForConditionalGeneration, + GPTSanJapaneseModel, + GPTSanJapanesePreTrainedModel, + ) + from .models.deprecated.graphormer import ( + GraphormerForGraphClassification, + GraphormerModel, + GraphormerPreTrainedModel, + ) + from .models.deprecated.jukebox import ( + JukeboxModel, + JukeboxPreTrainedModel, + JukeboxPrior, + JukeboxVQVAE, + ) from .models.deprecated.mctct import ( MCTCTForCTC, MCTCTModel, MCTCTPreTrainedModel, ) + from .models.deprecated.mega import ( + MegaForCausalLM, + MegaForMaskedLM, + MegaForMultipleChoice, + MegaForQuestionAnswering, + MegaForSequenceClassification, + MegaForTokenClassification, + MegaModel, + MegaPreTrainedModel, + ) from .models.deprecated.mmbt import ( MMBTForClassification, MMBTModel, ModalEmbeddings, ) + from .models.deprecated.nat import ( + NatBackbone, + NatForImageClassification, + NatModel, + NatPreTrainedModel, + ) + from .models.deprecated.nezha import ( + NezhaForMaskedLM, + NezhaForMultipleChoice, + NezhaForNextSentencePrediction, + NezhaForPreTraining, + NezhaForQuestionAnswering, + NezhaForSequenceClassification, + NezhaForTokenClassification, + NezhaModel, + NezhaPreTrainedModel, + ) from .models.deprecated.open_llama import ( OpenLlamaForCausalLM, OpenLlamaForSequenceClassification, OpenLlamaModel, OpenLlamaPreTrainedModel, ) + from .models.deprecated.qdqbert import ( + QDQBertForMaskedLM, + QDQBertForMultipleChoice, + QDQBertForNextSentencePrediction, + QDQBertForQuestionAnswering, + QDQBertForSequenceClassification, + QDQBertForTokenClassification, + QDQBertLayer, + QDQBertLMHeadModel, + QDQBertModel, + QDQBertPreTrainedModel, + load_tf_weights_in_qdqbert, + ) + from .models.deprecated.realm import ( + RealmEmbedder, + RealmForOpenQA, + RealmKnowledgeAugEncoder, + RealmPreTrainedModel, + RealmReader, + RealmRetriever, + RealmScorer, + load_tf_weights_in_realm, + ) from .models.deprecated.retribert import ( RetriBertModel, RetriBertPreTrainedModel, ) + from .models.deprecated.speech_to_text_2 import ( + Speech2Text2ForCausalLM, + Speech2Text2PreTrainedModel, + ) from .models.deprecated.trajectory_transformer import ( TrajectoryTransformerModel, TrajectoryTransformerPreTrainedModel, @@ -6332,20 +6424,34 @@ if TYPE_CHECKING: TransfoXLPreTrainedModel, load_tf_weights_in_transfo_xl, ) + from .models.deprecated.tvlt import ( + TvltForAudioVisualClassification, + TvltForPreTraining, + TvltModel, + TvltPreTrainedModel, + ) from .models.deprecated.van import ( VanForImageClassification, VanModel, VanPreTrainedModel, ) + from .models.deprecated.vit_hybrid import ( + ViTHybridForImageClassification, + ViTHybridModel, + ViTHybridPreTrainedModel, + ) + from .models.deprecated.xlm_prophetnet import ( + XLMProphetNetDecoder, + XLMProphetNetEncoder, + XLMProphetNetForCausalLM, + XLMProphetNetForConditionalGeneration, + XLMProphetNetModel, + XLMProphetNetPreTrainedModel, + ) from .models.depth_anything import ( DepthAnythingForDepthEstimation, DepthAnythingPreTrainedModel, ) - from .models.deta import ( - DetaForObjectDetection, - DetaModel, - DetaPreTrainedModel, - ) from .models.detr import ( DetrForObjectDetection, DetrForSegmentation, @@ -6392,12 +6498,6 @@ if TYPE_CHECKING: DPTModel, DPTPreTrainedModel, ) - from .models.efficientformer import ( - EfficientFormerForImageClassification, - EfficientFormerForImageClassificationWithTeacher, - EfficientFormerModel, - EfficientFormerPreTrainedModel, - ) from .models.efficientnet import ( EfficientNetForImageClassification, EfficientNetModel, @@ -6432,15 +6532,6 @@ if TYPE_CHECKING: ErnieModel, ErniePreTrainedModel, ) - from .models.ernie_m import ( - ErnieMForInformationExtraction, - ErnieMForMultipleChoice, - ErnieMForQuestionAnswering, - ErnieMForSequenceClassification, - ErnieMForTokenClassification, - ErnieMModel, - ErnieMPreTrainedModel, - ) from .models.esm import ( EsmFoldPreTrainedModel, EsmForMaskedLM, @@ -6589,16 +6680,6 @@ if TYPE_CHECKING: GPTJModel, GPTJPreTrainedModel, ) - from .models.gptsan_japanese import ( - GPTSanJapaneseForConditionalGeneration, - GPTSanJapaneseModel, - GPTSanJapanesePreTrainedModel, - ) - from .models.graphormer import ( - GraphormerForGraphClassification, - GraphormerModel, - GraphormerPreTrainedModel, - ) from .models.grounding_dino import ( GroundingDinoForObjectDetection, GroundingDinoModel, @@ -6667,12 +6748,6 @@ if TYPE_CHECKING: JetMoeModel, JetMoePreTrainedModel, ) - from .models.jukebox import ( - JukeboxModel, - JukeboxPreTrainedModel, - JukeboxPrior, - JukeboxVQVAE, - ) from .models.kosmos2 import ( Kosmos2ForConditionalGeneration, Kosmos2Model, @@ -6810,16 +6885,6 @@ if TYPE_CHECKING: MBartModel, MBartPreTrainedModel, ) - from .models.mega import ( - MegaForCausalLM, - MegaForMaskedLM, - MegaForMultipleChoice, - MegaForQuestionAnswering, - MegaForSequenceClassification, - MegaForTokenClassification, - MegaModel, - MegaPreTrainedModel, - ) from .models.megatron_bert import ( MegatronBertForCausalLM, MegatronBertForMaskedLM, @@ -6946,23 +7011,6 @@ if TYPE_CHECKING: MvpModel, MvpPreTrainedModel, ) - from .models.nat import ( - NatBackbone, - NatForImageClassification, - NatModel, - NatPreTrainedModel, - ) - from .models.nezha import ( - NezhaForMaskedLM, - NezhaForMultipleChoice, - NezhaForNextSentencePrediction, - NezhaForPreTraining, - NezhaForQuestionAnswering, - NezhaForSequenceClassification, - NezhaForTokenClassification, - NezhaModel, - NezhaPreTrainedModel, - ) from .models.nllb_moe import ( NllbMoeForConditionalGeneration, NllbMoeModel, @@ -7125,19 +7173,6 @@ if TYPE_CHECKING: PvtV2Model, PvtV2PreTrainedModel, ) - from .models.qdqbert import ( - QDQBertForMaskedLM, - QDQBertForMultipleChoice, - QDQBertForNextSentencePrediction, - QDQBertForQuestionAnswering, - QDQBertForSequenceClassification, - QDQBertForTokenClassification, - QDQBertLayer, - QDQBertLMHeadModel, - QDQBertModel, - QDQBertPreTrainedModel, - load_tf_weights_in_qdqbert, - ) from .models.qwen2 import ( Qwen2ForCausalLM, Qwen2ForSequenceClassification, @@ -7158,16 +7193,6 @@ if TYPE_CHECKING: RagSequenceForGeneration, RagTokenForGeneration, ) - from .models.realm import ( - RealmEmbedder, - RealmForOpenQA, - RealmKnowledgeAugEncoder, - RealmPreTrainedModel, - RealmReader, - RealmRetriever, - RealmScorer, - load_tf_weights_in_realm, - ) from .models.recurrent_gemma import ( RecurrentGemmaForCausalLM, RecurrentGemmaModel, @@ -7318,10 +7343,6 @@ if TYPE_CHECKING: Speech2TextModel, Speech2TextPreTrainedModel, ) - from .models.speech_to_text_2 import ( - Speech2Text2ForCausalLM, - Speech2Text2PreTrainedModel, - ) from .models.speecht5 import ( SpeechT5ForSpeechToSpeech, SpeechT5ForSpeechToText, @@ -7435,12 +7456,6 @@ if TYPE_CHECKING: TrOCRForCausalLM, TrOCRPreTrainedModel, ) - from .models.tvlt import ( - TvltForAudioVisualClassification, - TvltForPreTraining, - TvltModel, - TvltPreTrainedModel, - ) from .models.tvp import ( TvpForVideoGrounding, TvpModel, @@ -7525,11 +7540,6 @@ if TYPE_CHECKING: ViTModel, ViTPreTrainedModel, ) - from .models.vit_hybrid import ( - ViTHybridForImageClassification, - ViTHybridModel, - ViTHybridPreTrainedModel, - ) from .models.vit_mae import ( ViTMAEForPreTraining, ViTMAELayer, @@ -7622,14 +7632,6 @@ if TYPE_CHECKING: XLMPreTrainedModel, XLMWithLMHeadModel, ) - from .models.xlm_prophetnet import ( - XLMProphetNetDecoder, - XLMProphetNetEncoder, - XLMProphetNetForCausalLM, - XLMProphetNetForConditionalGeneration, - XLMProphetNetModel, - XLMProphetNetPreTrainedModel, - ) from .models.xlm_roberta import ( XLMRobertaForCausalLM, XLMRobertaForMaskedLM, @@ -7921,6 +7923,12 @@ if TYPE_CHECKING: TFDeiTModel, TFDeiTPreTrainedModel, ) + from .models.deprecated.efficientformer import ( + TFEfficientFormerForImageClassification, + TFEfficientFormerForImageClassificationWithTeacher, + TFEfficientFormerModel, + TFEfficientFormerPreTrainedModel, + ) from .models.deprecated.transfo_xl import ( TFAdaptiveEmbedding, TFTransfoXLForSequenceClassification, @@ -7947,12 +7955,6 @@ if TYPE_CHECKING: TFDPRQuestionEncoder, TFDPRReader, ) - from .models.efficientformer import ( - TFEfficientFormerForImageClassification, - TFEfficientFormerForImageClassificationWithTeacher, - TFEfficientFormerModel, - TFEfficientFormerPreTrainedModel, - ) from .models.electra import ( TFElectraForMaskedLM, TFElectraForMultipleChoice, diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py index 72e2d892ec..24b602f18c 100644 --- a/src/transformers/models/__init__.py +++ b/src/transformers/models/__init__.py @@ -67,7 +67,6 @@ from . import ( deit, deprecated, depth_anything, - deta, detr, dialogpt, dinat, @@ -77,13 +76,11 @@ from . import ( donut, dpr, dpt, - efficientformer, efficientnet, electra, encodec, encoder_decoder, ernie, - ernie_m, esm, falcon, fastspeech2_conformer, @@ -104,8 +101,6 @@ from . import ( gpt_neox_japanese, gpt_sw3, gptj, - gptsan_japanese, - graphormer, grounding_dino, groupvit, herbert, @@ -118,7 +113,6 @@ from . import ( instructblip, jamba, jetmoe, - jukebox, kosmos2, layoutlm, layoutlmv2, @@ -142,7 +136,6 @@ from . import ( maskformer, mbart, mbart50, - mega, megatron_bert, megatron_gpt2, mgp_str, @@ -161,8 +154,6 @@ from . import ( musicgen, musicgen_melody, mvp, - nat, - nezha, nllb, nllb_moe, nougat, @@ -190,11 +181,9 @@ from . import ( prophetnet, pvt, pvt_v2, - qdqbert, qwen2, qwen2_moe, rag, - realm, recurrent_gemma, reformer, regnet, @@ -215,7 +204,6 @@ from . import ( siglip, speech_encoder_decoder, speech_to_text, - speech_to_text_2, speecht5, splinter, squeezebert, @@ -234,7 +222,6 @@ from . import ( timesformer, timm_backbone, trocr, - tvlt, tvp, udop, umt5, @@ -250,7 +237,6 @@ from . import ( vision_text_dual_encoder, visual_bert, vit, - vit_hybrid, vit_mae, vit_msn, vitdet, @@ -267,7 +253,6 @@ from . import ( x_clip, xglm, xlm, - xlm_prophetnet, xlm_roberta, xlm_roberta_xl, xlnet, diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index 464f80e2d9..40e282166e 100755 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -585,14 +585,29 @@ MODEL_NAMES_MAPPING = OrderedDict( # `transfo-xl` (as in `CONFIG_MAPPING_NAMES`), we should use `transfo_xl`. DEPRECATED_MODELS = [ "bort", + "deta", + "efficientformer", + "ernie_m", + "gptsan_japanese", + "graphormer", + "jukebox", "mctct", + "mega", "mmbt", + "nat", + "nezha", "open_llama", + "qdqbert", + "realm", "retribert", + "speech_to_text_2", "tapex", "trajectory_transformer", "transfo_xl", + "tvlt", "van", + "vit_hybrid", + "xlm_prophetnet", ] SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict( @@ -616,7 +631,11 @@ def model_type_to_module_name(key): """Converts a config key to the corresponding module.""" # Special treatment if key in SPECIAL_MODEL_TYPE_TO_MODULE_NAME: - return SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key] + key = SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key] + + if key in DEPRECATED_MODELS: + key = f"deprecated.{key}" + return key key = key.replace("-", "_") if key in DEPRECATED_MODELS: diff --git a/src/transformers/models/deta/__init__.py b/src/transformers/models/deprecated/deta/__init__.py similarity index 94% rename from src/transformers/models/deta/__init__.py rename to src/transformers/models/deprecated/deta/__init__.py index 843a4dc4d8..ab54ec6f43 100644 --- a/src/transformers/models/deta/__init__.py +++ b/src/transformers/models/deprecated/deta/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available _import_structure = { diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deprecated/deta/configuration_deta.py similarity index 99% rename from src/transformers/models/deta/configuration_deta.py rename to src/transformers/models/deprecated/deta/configuration_deta.py index d7fe7eadc7..fcee8fc62a 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deprecated/deta/configuration_deta.py @@ -14,9 +14,9 @@ # limitations under the License. """DETA model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging -from ..auto import CONFIG_MAPPING +from ....configuration_utils import PretrainedConfig +from ....utils import logging +from ...auto import CONFIG_MAPPING logger = logging.get_logger(__name__) diff --git a/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py similarity index 100% rename from src/transformers/models/deta/convert_deta_resnet_to_pytorch.py rename to src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py diff --git a/src/transformers/models/deta/convert_deta_swin_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py similarity index 100% rename from src/transformers/models/deta/convert_deta_swin_to_pytorch.py rename to src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py diff --git a/src/transformers/models/deta/image_processing_deta.py b/src/transformers/models/deprecated/deta/image_processing_deta.py similarity index 99% rename from src/transformers/models/deta/image_processing_deta.py rename to src/transformers/models/deprecated/deta/image_processing_deta.py index a73eedba2c..57a9584397 100644 --- a/src/transformers/models/deta/image_processing_deta.py +++ b/src/transformers/models/deprecated/deta/image_processing_deta.py @@ -19,9 +19,9 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import numpy as np -from ...feature_extraction_utils import BatchFeature -from ...image_processing_utils import BaseImageProcessor, get_size_dict -from ...image_transforms import ( +from ....feature_extraction_utils import BatchFeature +from ....image_processing_utils import BaseImageProcessor, get_size_dict +from ....image_transforms import ( PaddingMode, center_to_corners_format, corners_to_center_format, @@ -31,7 +31,7 @@ from ...image_transforms import ( rgb_to_id, to_channel_dimension_format, ) -from ...image_utils import ( +from ....image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, AnnotationFormat, @@ -48,7 +48,7 @@ from ...image_utils import ( validate_annotations, validate_preprocess_arguments, ) -from ...utils import ( +from ....utils import ( is_flax_available, is_jax_tensor, is_tf_available, @@ -59,7 +59,7 @@ from ...utils import ( is_vision_available, logging, ) -from ...utils.generic import TensorType +from ....utils.generic import TensorType if is_torch_available(): diff --git a/src/transformers/models/deta/modeling_deta.py b/src/transformers/models/deprecated/deta/modeling_deta.py similarity index 99% rename from src/transformers/models/deta/modeling_deta.py rename to src/transformers/models/deprecated/deta/modeling_deta.py index fcd8fd82b6..03341f0ab8 100644 --- a/src/transformers/models/deta/modeling_deta.py +++ b/src/transformers/models/deprecated/deta/modeling_deta.py @@ -28,8 +28,8 @@ from torch import Tensor, nn from torch.autograd import Function from torch.autograd.function import once_differentiable -from ...activations import ACT2FN -from ...file_utils import ( +from ....activations import ACT2FN +from ....file_utils import ( ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -38,12 +38,12 @@ from ...file_utils import ( is_vision_available, replace_return_docstrings, ) -from ...modeling_attn_mask_utils import _prepare_4d_attention_mask -from ...modeling_outputs import BaseModelOutput -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import meshgrid -from ...utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends -from ...utils.backbone_utils import load_backbone +from ....modeling_attn_mask_utils import _prepare_4d_attention_mask +from ....modeling_outputs import BaseModelOutput +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import meshgrid +from ....utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends +from ....utils.backbone_utils import load_backbone from .configuration_deta import DetaConfig diff --git a/src/transformers/models/efficientformer/__init__.py b/src/transformers/models/deprecated/efficientformer/__init__.py similarity index 99% rename from src/transformers/models/efficientformer/__init__.py rename to src/transformers/models/deprecated/efficientformer/__init__.py index 9b36518587..67d046a8b6 100644 --- a/src/transformers/models/efficientformer/__init__.py +++ b/src/transformers/models/deprecated/efficientformer/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( +from ....utils import ( OptionalDependencyNotAvailable, _LazyModule, is_tf_available, diff --git a/src/transformers/models/efficientformer/configuration_efficientformer.py b/src/transformers/models/deprecated/efficientformer/configuration_efficientformer.py similarity index 98% rename from src/transformers/models/efficientformer/configuration_efficientformer.py rename to src/transformers/models/deprecated/efficientformer/configuration_efficientformer.py index a9fbfa7e05..fb161d61fc 100644 --- a/src/transformers/models/efficientformer/configuration_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/configuration_efficientformer.py @@ -16,8 +16,8 @@ from typing import List -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py similarity index 100% rename from src/transformers/models/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py rename to src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py diff --git a/src/transformers/models/efficientformer/image_processing_efficientformer.py b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py similarity index 98% rename from src/transformers/models/efficientformer/image_processing_efficientformer.py rename to src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py index 38756f7c95..15fdf04051 100644 --- a/src/transformers/models/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py @@ -18,13 +18,13 @@ from typing import Dict, List, Optional, Union import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict -from ...image_transforms import ( +from ....image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ....image_transforms import ( get_resize_output_image_size, resize, to_channel_dimension_format, ) -from ...image_utils import ( +from ....image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, ChannelDimension, @@ -38,7 +38,7 @@ from ...image_utils import ( validate_kwargs, validate_preprocess_arguments, ) -from ...utils import TensorType, logging +from ....utils import TensorType, logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/efficientformer/modeling_efficientformer.py b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py similarity index 99% rename from src/transformers/models/efficientformer/modeling_efficientformer.py rename to src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py index 44d2adbed4..461490c7f5 100644 --- a/src/transformers/models/efficientformer/modeling_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py @@ -23,10 +23,10 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput -from ...modeling_utils import PreTrainedModel -from ...utils import ( +from ....activations import ACT2FN +from ....modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput +from ....modeling_utils import PreTrainedModel +from ....utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, diff --git a/src/transformers/models/efficientformer/modeling_tf_efficientformer.py b/src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py similarity index 99% rename from src/transformers/models/efficientformer/modeling_tf_efficientformer.py rename to src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py index a8ce9dd306..d47d06e783 100644 --- a/src/transformers/models/efficientformer/modeling_tf_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py @@ -20,13 +20,13 @@ from typing import Optional, Tuple, Union import tensorflow as tf -from ...activations_tf import ACT2FN -from ...modeling_tf_outputs import ( +from ....activations_tf import ACT2FN +from ....modeling_tf_outputs import ( TFBaseModelOutput, TFBaseModelOutputWithPooling, TFImageClassifierOutput, ) -from ...modeling_tf_utils import ( +from ....modeling_tf_utils import ( TFPreTrainedModel, TFSequenceClassificationLoss, get_initializer, @@ -34,8 +34,8 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax -from ...utils import ( +from ....tf_utils import shape_list, stable_softmax +from ....utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, diff --git a/src/transformers/models/ernie_m/__init__.py b/src/transformers/models/deprecated/ernie_m/__init__.py similarity index 95% rename from src/transformers/models/ernie_m/__init__.py rename to src/transformers/models/deprecated/ernie_m/__init__.py index fc7076e439..68964d7574 100644 --- a/src/transformers/models/ernie_m/__init__.py +++ b/src/transformers/models/deprecated/ernie_m/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING # rely on isort to merge the imports -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available _import_structure = { diff --git a/src/transformers/models/ernie_m/configuration_ernie_m.py b/src/transformers/models/deprecated/ernie_m/configuration_ernie_m.py similarity index 99% rename from src/transformers/models/ernie_m/configuration_ernie_m.py rename to src/transformers/models/deprecated/ernie_m/configuration_ernie_m.py index cf34a510f2..d5c3feb951 100644 --- a/src/transformers/models/ernie_m/configuration_ernie_m.py +++ b/src/transformers/models/deprecated/ernie_m/configuration_ernie_m.py @@ -19,7 +19,7 @@ from __future__ import annotations from typing import Dict -from ...configuration_utils import PretrainedConfig +from ....configuration_utils import PretrainedConfig class ErnieMConfig(PretrainedConfig): diff --git a/src/transformers/models/ernie_m/modeling_ernie_m.py b/src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py similarity index 99% rename from src/transformers/models/ernie_m/modeling_ernie_m.py rename to src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py index 6b977801fe..d8349ee5aa 100755 --- a/src/transformers/models/ernie_m/modeling_ernie_m.py +++ b/src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py @@ -22,8 +22,8 @@ import torch.utils.checkpoint from torch import nn, tensor from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import ( +from ....activations import ACT2FN +from ....modeling_outputs import ( BaseModelOutputWithPastAndCrossAttentions, BaseModelOutputWithPoolingAndCrossAttentions, MultipleChoiceModelOutput, @@ -31,9 +31,9 @@ from ...modeling_outputs import ( SequenceClassifierOutput, TokenClassifierOutput, ) -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from .configuration_ernie_m import ErnieMConfig diff --git a/src/transformers/models/ernie_m/tokenization_ernie_m.py b/src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py similarity index 99% rename from src/transformers/models/ernie_m/tokenization_ernie_m.py rename to src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py index 0bd7edea1c..07f9f4ed47 100644 --- a/src/transformers/models/ernie_m/tokenization_ernie_m.py +++ b/src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py @@ -21,8 +21,8 @@ from typing import Any, Dict, List, Optional, Tuple import sentencepiece as spm -from ...tokenization_utils import PreTrainedTokenizer -from ...utils import logging +from ....tokenization_utils import PreTrainedTokenizer +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/gptsan_japanese/__init__.py b/src/transformers/models/deprecated/gptsan_japanese/__init__.py similarity index 98% rename from src/transformers/models/gptsan_japanese/__init__.py rename to src/transformers/models/deprecated/gptsan_japanese/__init__.py index 9ae8af3466..5bd0f99840 100644 --- a/src/transformers/models/gptsan_japanese/__init__.py +++ b/src/transformers/models/deprecated/gptsan_japanese/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import ( +from ....utils import ( OptionalDependencyNotAvailable, _LazyModule, is_flax_available, diff --git a/src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/configuration_gptsan_japanese.py similarity index 98% rename from src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py rename to src/transformers/models/deprecated/gptsan_japanese/configuration_gptsan_japanese.py index 23295f3510..52bd33ac9f 100644 --- a/src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/configuration_gptsan_japanese.py @@ -14,8 +14,8 @@ # limitations under the License. """GPTSAN-japanese model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py similarity index 100% rename from src/transformers/models/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py rename to src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py diff --git a/src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py similarity index 99% rename from src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py rename to src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py index 7faafd9efb..5129c1091b 100644 --- a/src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py @@ -20,10 +20,10 @@ from typing import List, Optional, Tuple, Union import torch import torch.nn as nn -from ...activations import ACT2FN -from ...modeling_outputs import MoECausalLMOutputWithPast, MoEModelOutputWithPastAndCrossAttentions -from ...modeling_utils import PreTrainedModel -from ...utils import ( +from ....activations import ACT2FN +from ....modeling_outputs import MoECausalLMOutputWithPast, MoEModelOutputWithPastAndCrossAttentions +from ....modeling_utils import PreTrainedModel +from ....utils import ( DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings, diff --git a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py similarity index 99% rename from src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py rename to src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py index 56756f3c32..e86aa47c1a 100644 --- a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py @@ -22,8 +22,8 @@ from typing import List, Optional, Tuple, Union import numpy as np -from ...tokenization_utils import PreTrainedTokenizer -from ...tokenization_utils_base import ( +from ....tokenization_utils import PreTrainedTokenizer +from ....tokenization_utils_base import ( BatchEncoding, PreTokenizedInput, PreTokenizedInputPair, @@ -31,7 +31,7 @@ from ...tokenization_utils_base import ( TextInputPair, TruncationStrategy, ) -from ...utils import PaddingStrategy, logging +from ....utils import PaddingStrategy, logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/graphormer/__init__.py b/src/transformers/models/deprecated/graphormer/__init__.py similarity index 93% rename from src/transformers/models/graphormer/__init__.py rename to src/transformers/models/deprecated/graphormer/__init__.py index f8140c81c1..117bf7c15a 100644 --- a/src/transformers/models/graphormer/__init__.py +++ b/src/transformers/models/deprecated/graphormer/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available _import_structure = { diff --git a/src/transformers/models/graphormer/algos_graphormer.pyx b/src/transformers/models/deprecated/graphormer/algos_graphormer.pyx similarity index 100% rename from src/transformers/models/graphormer/algos_graphormer.pyx rename to src/transformers/models/deprecated/graphormer/algos_graphormer.pyx diff --git a/src/transformers/models/graphormer/collating_graphormer.py b/src/transformers/models/deprecated/graphormer/collating_graphormer.py similarity index 98% rename from src/transformers/models/graphormer/collating_graphormer.py rename to src/transformers/models/deprecated/graphormer/collating_graphormer.py index 58ce602ea2..1c2342913d 100644 --- a/src/transformers/models/graphormer/collating_graphormer.py +++ b/src/transformers/models/deprecated/graphormer/collating_graphormer.py @@ -6,7 +6,7 @@ from typing import Any, Dict, List, Mapping import numpy as np import torch -from ...utils import is_cython_available, requires_backends +from ....utils import is_cython_available, requires_backends if is_cython_available(): diff --git a/src/transformers/models/graphormer/configuration_graphormer.py b/src/transformers/models/deprecated/graphormer/configuration_graphormer.py similarity index 99% rename from src/transformers/models/graphormer/configuration_graphormer.py rename to src/transformers/models/deprecated/graphormer/configuration_graphormer.py index 9f6904ef38..058ef9d03a 100644 --- a/src/transformers/models/graphormer/configuration_graphormer.py +++ b/src/transformers/models/deprecated/graphormer/configuration_graphormer.py @@ -14,8 +14,8 @@ # limitations under the License. """Graphormer model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/graphormer/modeling_graphormer.py b/src/transformers/models/deprecated/graphormer/modeling_graphormer.py similarity index 99% rename from src/transformers/models/graphormer/modeling_graphormer.py rename to src/transformers/models/deprecated/graphormer/modeling_graphormer.py index f2696a586b..0eb4aa7119 100755 --- a/src/transformers/models/graphormer/modeling_graphormer.py +++ b/src/transformers/models/deprecated/graphormer/modeling_graphormer.py @@ -21,13 +21,13 @@ import torch import torch.nn as nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import ( +from ....activations import ACT2FN +from ....modeling_outputs import ( BaseModelOutputWithNoAttention, SequenceClassifierOutput, ) -from ...modeling_utils import PreTrainedModel -from ...utils import logging +from ....modeling_utils import PreTrainedModel +from ....utils import logging from .configuration_graphormer import GraphormerConfig diff --git a/src/transformers/models/jukebox/__init__.py b/src/transformers/models/deprecated/jukebox/__init__.py similarity index 95% rename from src/transformers/models/jukebox/__init__.py rename to src/transformers/models/deprecated/jukebox/__init__.py index 441b11329c..d6de906389 100644 --- a/src/transformers/models/jukebox/__init__.py +++ b/src/transformers/models/deprecated/jukebox/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available _import_structure = { diff --git a/src/transformers/models/jukebox/configuration_jukebox.py b/src/transformers/models/deprecated/jukebox/configuration_jukebox.py similarity index 99% rename from src/transformers/models/jukebox/configuration_jukebox.py rename to src/transformers/models/deprecated/jukebox/configuration_jukebox.py index a2eee03885..e9d08c478f 100644 --- a/src/transformers/models/jukebox/configuration_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/configuration_jukebox.py @@ -17,8 +17,8 @@ import os from typing import List, Union -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/jukebox/convert_jukebox.py b/src/transformers/models/deprecated/jukebox/convert_jukebox.py similarity index 100% rename from src/transformers/models/jukebox/convert_jukebox.py rename to src/transformers/models/deprecated/jukebox/convert_jukebox.py diff --git a/src/transformers/models/jukebox/modeling_jukebox.py b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py similarity index 99% rename from src/transformers/models/jukebox/modeling_jukebox.py rename to src/transformers/models/deprecated/jukebox/modeling_jukebox.py index 9af8dbd684..6688c79e71 100755 --- a/src/transformers/models/jukebox/modeling_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py @@ -24,10 +24,10 @@ import torch.nn.functional as F from torch import nn from torch.nn import LayerNorm as FusedLayerNorm -from ...activations import ACT2FN -from ...modeling_utils import PreTrainedModel -from ...utils import add_start_docstrings, logging -from ...utils.logging import tqdm +from ....activations import ACT2FN +from ....modeling_utils import PreTrainedModel +from ....utils import add_start_docstrings, logging +from ....utils.logging import tqdm from .configuration_jukebox import ATTENTION_PATTERNS, JukeboxConfig, JukeboxPriorConfig, JukeboxVQVAEConfig diff --git a/src/transformers/models/jukebox/tokenization_jukebox.py b/src/transformers/models/deprecated/jukebox/tokenization_jukebox.py similarity index 98% rename from src/transformers/models/jukebox/tokenization_jukebox.py rename to src/transformers/models/deprecated/jukebox/tokenization_jukebox.py index 4952adda64..fb827fbca9 100644 --- a/src/transformers/models/jukebox/tokenization_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/tokenization_jukebox.py @@ -24,10 +24,10 @@ from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np import regex -from ...tokenization_utils import AddedToken, PreTrainedTokenizer -from ...tokenization_utils_base import BatchEncoding -from ...utils import TensorType, is_flax_available, is_tf_available, is_torch_available, logging -from ...utils.generic import _is_jax, _is_numpy +from ....tokenization_utils import AddedToken, PreTrainedTokenizer +from ....tokenization_utils_base import BatchEncoding +from ....utils import TensorType, is_flax_available, is_tf_available, is_torch_available, logging +from ....utils.generic import _is_jax, _is_numpy logger = logging.get_logger(__name__) diff --git a/src/transformers/models/mega/__init__.py b/src/transformers/models/deprecated/mega/__init__.py similarity index 98% rename from src/transformers/models/mega/__init__.py rename to src/transformers/models/deprecated/mega/__init__.py index 3e3b204d8b..1774d3bae4 100644 --- a/src/transformers/models/mega/__init__.py +++ b/src/transformers/models/deprecated/mega/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import ( +from ....utils import ( OptionalDependencyNotAvailable, _LazyModule, is_torch_available, diff --git a/src/transformers/models/mega/configuration_mega.py b/src/transformers/models/deprecated/mega/configuration_mega.py similarity index 99% rename from src/transformers/models/mega/configuration_mega.py rename to src/transformers/models/deprecated/mega/configuration_mega.py index b090a020af..0b1ab53d5f 100644 --- a/src/transformers/models/mega/configuration_mega.py +++ b/src/transformers/models/deprecated/mega/configuration_mega.py @@ -17,9 +17,9 @@ from collections import OrderedDict from typing import Mapping -from ...configuration_utils import PretrainedConfig -from ...onnx import OnnxConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....onnx import OnnxConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py similarity index 100% rename from src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py rename to src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py diff --git a/src/transformers/models/mega/modeling_mega.py b/src/transformers/models/deprecated/mega/modeling_mega.py similarity index 99% rename from src/transformers/models/mega/modeling_mega.py rename to src/transformers/models/deprecated/mega/modeling_mega.py index 65fff1cd49..92d91bdb28 100644 --- a/src/transformers/models/mega/modeling_mega.py +++ b/src/transformers/models/deprecated/mega/modeling_mega.py @@ -23,8 +23,8 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import ( +from ....activations import ACT2FN +from ....modeling_outputs import ( BaseModelOutputWithPoolingAndCrossAttentions, CausalLMOutputWithCrossAttentions, MaskedLMOutput, @@ -33,9 +33,9 @@ from ...modeling_outputs import ( SequenceClassifierOutput, TokenClassifierOutput, ) -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import ALL_LAYERNORM_LAYERS -from ...utils import ( +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import ALL_LAYERNORM_LAYERS +from ....utils import ( add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, diff --git a/src/transformers/models/nat/__init__.py b/src/transformers/models/deprecated/nat/__init__.py similarity index 94% rename from src/transformers/models/nat/__init__.py rename to src/transformers/models/deprecated/nat/__init__.py index bcf05ddf41..70d2cfd295 100644 --- a/src/transformers/models/nat/__init__.py +++ b/src/transformers/models/deprecated/nat/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available _import_structure = {"configuration_nat": ["NatConfig"]} diff --git a/src/transformers/models/nat/configuration_nat.py b/src/transformers/models/deprecated/nat/configuration_nat.py similarity index 97% rename from src/transformers/models/nat/configuration_nat.py rename to src/transformers/models/deprecated/nat/configuration_nat.py index b20a60ac1e..2fef74d2a0 100644 --- a/src/transformers/models/nat/configuration_nat.py +++ b/src/transformers/models/deprecated/nat/configuration_nat.py @@ -14,9 +14,9 @@ # limitations under the License. """Neighborhood Attention Transformer model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging -from ...utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices +from ....configuration_utils import PretrainedConfig +from ....utils import logging +from ....utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices logger = logging.get_logger(__name__) diff --git a/src/transformers/models/nat/modeling_nat.py b/src/transformers/models/deprecated/nat/modeling_nat.py similarity index 99% rename from src/transformers/models/nat/modeling_nat.py rename to src/transformers/models/deprecated/nat/modeling_nat.py index fa51801009..58d92ada0b 100644 --- a/src/transformers/models/nat/modeling_nat.py +++ b/src/transformers/models/deprecated/nat/modeling_nat.py @@ -23,11 +23,11 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import BackboneOutput -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import ( +from ....activations import ACT2FN +from ....modeling_outputs import BackboneOutput +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import ( ModelOutput, OptionalDependencyNotAvailable, add_code_sample_docstrings, @@ -38,7 +38,7 @@ from ...utils import ( replace_return_docstrings, requires_backends, ) -from ...utils.backbone_utils import BackboneMixin +from ....utils.backbone_utils import BackboneMixin from .configuration_nat import NatConfig diff --git a/src/transformers/models/nezha/__init__.py b/src/transformers/models/deprecated/nezha/__init__.py similarity index 94% rename from src/transformers/models/nezha/__init__.py rename to src/transformers/models/deprecated/nezha/__init__.py index 5149adf3a0..590b0013c5 100644 --- a/src/transformers/models/nezha/__init__.py +++ b/src/transformers/models/deprecated/nezha/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available _import_structure = { diff --git a/src/transformers/models/nezha/configuration_nezha.py b/src/transformers/models/deprecated/nezha/configuration_nezha.py similarity index 99% rename from src/transformers/models/nezha/configuration_nezha.py rename to src/transformers/models/deprecated/nezha/configuration_nezha.py index 4e145e4b68..c60bb5de51 100644 --- a/src/transformers/models/nezha/configuration_nezha.py +++ b/src/transformers/models/deprecated/nezha/configuration_nezha.py @@ -1,4 +1,4 @@ -from ... import PretrainedConfig +from .... import PretrainedConfig class NezhaConfig(PretrainedConfig): diff --git a/src/transformers/models/nezha/modeling_nezha.py b/src/transformers/models/deprecated/nezha/modeling_nezha.py similarity index 99% rename from src/transformers/models/nezha/modeling_nezha.py rename to src/transformers/models/deprecated/nezha/modeling_nezha.py index 30c8b6d890..ef20396c00 100644 --- a/src/transformers/models/nezha/modeling_nezha.py +++ b/src/transformers/models/deprecated/nezha/modeling_nezha.py @@ -25,8 +25,8 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import ( +from ....activations import ACT2FN +from ....modeling_outputs import ( BaseModelOutputWithPastAndCrossAttentions, BaseModelOutputWithPoolingAndCrossAttentions, MaskedLMOutput, @@ -36,9 +36,9 @@ from ...modeling_outputs import ( SequenceClassifierOutput, TokenClassifierOutput, ) -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import ( +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, diff --git a/src/transformers/models/qdqbert/__init__.py b/src/transformers/models/deprecated/qdqbert/__init__.py similarity index 96% rename from src/transformers/models/qdqbert/__init__.py rename to src/transformers/models/deprecated/qdqbert/__init__.py index d413aefe0c..06e69cdc1f 100644 --- a/src/transformers/models/qdqbert/__init__.py +++ b/src/transformers/models/deprecated/qdqbert/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available _import_structure = {"configuration_qdqbert": ["QDQBertConfig"]} diff --git a/src/transformers/models/qdqbert/configuration_qdqbert.py b/src/transformers/models/deprecated/qdqbert/configuration_qdqbert.py similarity index 98% rename from src/transformers/models/qdqbert/configuration_qdqbert.py rename to src/transformers/models/deprecated/qdqbert/configuration_qdqbert.py index 9f1fdfe31d..b2ba629b24 100644 --- a/src/transformers/models/qdqbert/configuration_qdqbert.py +++ b/src/transformers/models/deprecated/qdqbert/configuration_qdqbert.py @@ -14,8 +14,8 @@ # limitations under the License. """QDQBERT model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/qdqbert/modeling_qdqbert.py b/src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py similarity index 99% rename from src/transformers/models/qdqbert/modeling_qdqbert.py rename to src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py index 6078061873..f58c9b7fd6 100755 --- a/src/transformers/models/qdqbert/modeling_qdqbert.py +++ b/src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py @@ -25,8 +25,8 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import ( +from ....activations import ACT2FN +from ....modeling_outputs import ( BaseModelOutputWithPastAndCrossAttentions, BaseModelOutputWithPoolingAndCrossAttentions, CausalLMOutputWithCrossAttentions, @@ -37,9 +37,9 @@ from ...modeling_outputs import ( SequenceClassifierOutput, TokenClassifierOutput, ) -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import ( +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import ( add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, diff --git a/src/transformers/models/realm/__init__.py b/src/transformers/models/deprecated/realm/__init__.py similarity index 95% rename from src/transformers/models/realm/__init__.py rename to src/transformers/models/deprecated/realm/__init__.py index eea7384673..85fe72441f 100644 --- a/src/transformers/models/realm/__init__.py +++ b/src/transformers/models/deprecated/realm/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available _import_structure = { diff --git a/src/transformers/models/realm/configuration_realm.py b/src/transformers/models/deprecated/realm/configuration_realm.py similarity index 98% rename from src/transformers/models/realm/configuration_realm.py rename to src/transformers/models/deprecated/realm/configuration_realm.py index 7e84f2916d..20fd201d98 100644 --- a/src/transformers/models/realm/configuration_realm.py +++ b/src/transformers/models/deprecated/realm/configuration_realm.py @@ -14,8 +14,8 @@ # limitations under the License. """REALM model configuration.""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/realm/modeling_realm.py b/src/transformers/models/deprecated/realm/modeling_realm.py similarity index 99% rename from src/transformers/models/realm/modeling_realm.py rename to src/transformers/models/deprecated/realm/modeling_realm.py index 7c5c344ae5..f41eafe184 100644 --- a/src/transformers/models/realm/modeling_realm.py +++ b/src/transformers/models/deprecated/realm/modeling_realm.py @@ -23,16 +23,16 @@ import torch from torch import nn from torch.nn import CrossEntropyLoss -from ...activations import ACT2FN -from ...modeling_outputs import ( +from ....activations import ACT2FN +from ....modeling_outputs import ( BaseModelOutputWithPastAndCrossAttentions, BaseModelOutputWithPoolingAndCrossAttentions, MaskedLMOutput, ModelOutput, ) -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings from .configuration_realm import RealmConfig diff --git a/src/transformers/models/realm/retrieval_realm.py b/src/transformers/models/deprecated/realm/retrieval_realm.py similarity index 99% rename from src/transformers/models/realm/retrieval_realm.py rename to src/transformers/models/deprecated/realm/retrieval_realm.py index c84e7af08f..4bfa2106c6 100644 --- a/src/transformers/models/realm/retrieval_realm.py +++ b/src/transformers/models/deprecated/realm/retrieval_realm.py @@ -20,8 +20,8 @@ from typing import Optional, Union import numpy as np from huggingface_hub import hf_hub_download -from ... import AutoTokenizer -from ...utils import logging +from .... import AutoTokenizer +from ....utils import logging _REALM_BLOCK_RECORDS_FILENAME = "block_records.npy" diff --git a/src/transformers/models/realm/tokenization_realm.py b/src/transformers/models/deprecated/realm/tokenization_realm.py similarity index 99% rename from src/transformers/models/realm/tokenization_realm.py rename to src/transformers/models/deprecated/realm/tokenization_realm.py index c4ff7e38a3..671405301d 100644 --- a/src/transformers/models/realm/tokenization_realm.py +++ b/src/transformers/models/deprecated/realm/tokenization_realm.py @@ -19,9 +19,9 @@ import os import unicodedata from typing import List, Optional, Tuple -from ...tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace -from ...tokenization_utils_base import BatchEncoding -from ...utils import PaddingStrategy, logging +from ....tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace +from ....tokenization_utils_base import BatchEncoding +from ....utils import PaddingStrategy, logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/realm/tokenization_realm_fast.py b/src/transformers/models/deprecated/realm/tokenization_realm_fast.py similarity index 98% rename from src/transformers/models/realm/tokenization_realm_fast.py rename to src/transformers/models/deprecated/realm/tokenization_realm_fast.py index 7315bf1c25..cbc4869e54 100644 --- a/src/transformers/models/realm/tokenization_realm_fast.py +++ b/src/transformers/models/deprecated/realm/tokenization_realm_fast.py @@ -19,9 +19,9 @@ from typing import List, Optional, Tuple from tokenizers import normalizers -from ...tokenization_utils_base import BatchEncoding -from ...tokenization_utils_fast import PreTrainedTokenizerFast -from ...utils import PaddingStrategy, logging +from ....tokenization_utils_base import BatchEncoding +from ....tokenization_utils_fast import PreTrainedTokenizerFast +from ....utils import PaddingStrategy, logging from .tokenization_realm import RealmTokenizer diff --git a/src/transformers/models/speech_to_text_2/__init__.py b/src/transformers/models/deprecated/speech_to_text_2/__init__.py similarity index 98% rename from src/transformers/models/speech_to_text_2/__init__.py rename to src/transformers/models/deprecated/speech_to_text_2/__init__.py index ab507bc19f..53f806d00c 100644 --- a/src/transformers/models/speech_to_text_2/__init__.py +++ b/src/transformers/models/deprecated/speech_to_text_2/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( +from ....utils import ( OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, diff --git a/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/configuration_speech_to_text_2.py similarity index 98% rename from src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py rename to src/transformers/models/deprecated/speech_to_text_2/configuration_speech_to_text_2.py index bcc92a7bd2..d876c4fc3e 100644 --- a/src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/configuration_speech_to_text_2.py @@ -14,8 +14,8 @@ # limitations under the License. """Speech2Text model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py similarity index 99% rename from src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py rename to src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py index 35305408e6..6953821648 100755 --- a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py @@ -22,11 +22,11 @@ import torch from torch import nn from torch.nn import CrossEntropyLoss -from ...activations import ACT2FN -from ...modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_causal_attention_mask -from ...modeling_outputs import BaseModelOutputWithPastAndCrossAttentions, CausalLMOutputWithCrossAttentions -from ...modeling_utils import PreTrainedModel -from ...utils import add_start_docstrings, logging, replace_return_docstrings +from ....activations import ACT2FN +from ....modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_causal_attention_mask +from ....modeling_outputs import BaseModelOutputWithPastAndCrossAttentions, CausalLMOutputWithCrossAttentions +from ....modeling_utils import PreTrainedModel +from ....utils import add_start_docstrings, logging, replace_return_docstrings from .configuration_speech_to_text_2 import Speech2Text2Config diff --git a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/processing_speech_to_text_2.py similarity index 98% rename from src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py rename to src/transformers/models/deprecated/speech_to_text_2/processing_speech_to_text_2.py index 9e0881d89d..ce8527e4a7 100644 --- a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/processing_speech_to_text_2.py @@ -19,7 +19,7 @@ Speech processor class for Speech2Text2 import warnings from contextlib import contextmanager -from ...processing_utils import ProcessorMixin +from ....processing_utils import ProcessorMixin class Speech2Text2Processor(ProcessorMixin): diff --git a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/tokenization_speech_to_text_2.py similarity index 98% rename from src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py rename to src/transformers/models/deprecated/speech_to_text_2/tokenization_speech_to_text_2.py index 8d6818356f..2eefe44915 100644 --- a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/tokenization_speech_to_text_2.py @@ -18,8 +18,8 @@ import json import os from typing import Dict, List, Optional, Tuple -from ...tokenization_utils import PreTrainedTokenizer -from ...utils import logging +from ....tokenization_utils import PreTrainedTokenizer +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/tvlt/__init__.py b/src/transformers/models/deprecated/tvlt/__init__.py similarity index 99% rename from src/transformers/models/tvlt/__init__.py rename to src/transformers/models/deprecated/tvlt/__init__.py index d63bad0a7a..0a2f1e3934 100644 --- a/src/transformers/models/tvlt/__init__.py +++ b/src/transformers/models/deprecated/tvlt/__init__.py @@ -17,7 +17,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( +from ....utils import ( OptionalDependencyNotAvailable, _LazyModule, is_torch_available, diff --git a/src/transformers/models/tvlt/configuration_tvlt.py b/src/transformers/models/deprecated/tvlt/configuration_tvlt.py similarity index 99% rename from src/transformers/models/tvlt/configuration_tvlt.py rename to src/transformers/models/deprecated/tvlt/configuration_tvlt.py index 1a1782f68c..bc9c133bec 100644 --- a/src/transformers/models/tvlt/configuration_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/configuration_tvlt.py @@ -14,8 +14,8 @@ # limitations under the License. """TVLT model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/tvlt/feature_extraction_tvlt.py b/src/transformers/models/deprecated/tvlt/feature_extraction_tvlt.py similarity index 98% rename from src/transformers/models/tvlt/feature_extraction_tvlt.py rename to src/transformers/models/deprecated/tvlt/feature_extraction_tvlt.py index 7dc5e04631..2d41af33e5 100644 --- a/src/transformers/models/tvlt/feature_extraction_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/feature_extraction_tvlt.py @@ -19,9 +19,9 @@ from typing import List, Optional, Union import numpy as np -from ...audio_utils import mel_filter_bank, spectrogram, window_function -from ...feature_extraction_sequence_utils import BatchFeature, SequenceFeatureExtractor -from ...utils import TensorType, logging +from ....audio_utils import mel_filter_bank, spectrogram, window_function +from ....feature_extraction_sequence_utils import BatchFeature, SequenceFeatureExtractor +from ....utils import TensorType, logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/tvlt/image_processing_tvlt.py b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py similarity index 99% rename from src/transformers/models/tvlt/image_processing_tvlt.py rename to src/transformers/models/deprecated/tvlt/image_processing_tvlt.py index 06576a0f7e..009f8307d4 100644 --- a/src/transformers/models/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py @@ -18,13 +18,13 @@ from typing import Dict, List, Optional, Union import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict -from ...image_transforms import ( +from ....image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ....image_transforms import ( get_resize_output_image_size, resize, to_channel_dimension_format, ) -from ...image_utils import ( +from ....image_utils import ( IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ChannelDimension, @@ -38,7 +38,7 @@ from ...image_utils import ( validate_kwargs, validate_preprocess_arguments, ) -from ...utils import TensorType, logging +from ....utils import TensorType, logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/tvlt/modeling_tvlt.py b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py similarity index 99% rename from src/transformers/models/tvlt/modeling_tvlt.py rename to src/transformers/models/deprecated/tvlt/modeling_tvlt.py index d49fef5822..ae84a7df19 100644 --- a/src/transformers/models/tvlt/modeling_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py @@ -25,11 +25,11 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import BaseModelOutput, SequenceClassifierOutput -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import ( +from ....activations import ACT2FN +from ....modeling_outputs import BaseModelOutput, SequenceClassifierOutput +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import ( ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, diff --git a/src/transformers/models/tvlt/processing_tvlt.py b/src/transformers/models/deprecated/tvlt/processing_tvlt.py similarity index 98% rename from src/transformers/models/tvlt/processing_tvlt.py rename to src/transformers/models/deprecated/tvlt/processing_tvlt.py index c67a3a8c6d..da9c755b55 100644 --- a/src/transformers/models/tvlt/processing_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/processing_tvlt.py @@ -16,7 +16,7 @@ Processor class for TVLT. """ -from ...processing_utils import ProcessorMixin +from ....processing_utils import ProcessorMixin class TvltProcessor(ProcessorMixin): diff --git a/src/transformers/models/vit_hybrid/__init__.py b/src/transformers/models/deprecated/vit_hybrid/__init__.py similarity index 94% rename from src/transformers/models/vit_hybrid/__init__.py rename to src/transformers/models/deprecated/vit_hybrid/__init__.py index f87e44449a..d0f9c5831d 100644 --- a/src/transformers/models/vit_hybrid/__init__.py +++ b/src/transformers/models/deprecated/vit_hybrid/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available _import_structure = {"configuration_vit_hybrid": ["ViTHybridConfig"]} diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py similarity index 97% rename from src/transformers/models/vit_hybrid/configuration_vit_hybrid.py rename to src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py index 78349af336..c0e4244a5a 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py @@ -14,10 +14,10 @@ # limitations under the License. """ViT Hybrid model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging -from ..auto.configuration_auto import CONFIG_MAPPING -from ..bit import BitConfig +from ....configuration_utils import PretrainedConfig +from ....utils import logging +from ...auto.configuration_auto import CONFIG_MAPPING +from ...bit import BitConfig logger = logging.get_logger(__name__) diff --git a/src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py b/src/transformers/models/deprecated/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py similarity index 100% rename from src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py rename to src/transformers/models/deprecated/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py diff --git a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py similarity index 98% rename from src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py rename to src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index 4bb3f70b49..b8db4a7fae 100644 --- a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -18,14 +18,14 @@ from typing import Dict, List, Optional, Union import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict -from ...image_transforms import ( +from ....image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ....image_transforms import ( convert_to_rgb, get_resize_output_image_size, resize, to_channel_dimension_format, ) -from ...image_utils import ( +from ....image_utils import ( OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, ChannelDimension, @@ -39,7 +39,7 @@ from ...image_utils import ( validate_kwargs, validate_preprocess_arguments, ) -from ...utils import TensorType, is_vision_available, logging +from ....utils import TensorType, is_vision_available, logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/vit_hybrid/modeling_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py similarity index 98% rename from src/transformers/models/vit_hybrid/modeling_vit_hybrid.py rename to src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py index 9c92e592c1..9c025d3615 100644 --- a/src/transformers/models/vit_hybrid/modeling_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py @@ -23,12 +23,12 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput -from ...modeling_utils import PreTrainedModel -from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer -from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging -from ...utils.backbone_utils import load_backbone +from ....activations import ACT2FN +from ....modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput +from ....modeling_utils import PreTrainedModel +from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer +from ....utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging +from ....utils.backbone_utils import load_backbone from .configuration_vit_hybrid import ViTHybridConfig diff --git a/src/transformers/models/xlm_prophetnet/__init__.py b/src/transformers/models/deprecated/xlm_prophetnet/__init__.py similarity index 95% rename from src/transformers/models/xlm_prophetnet/__init__.py rename to src/transformers/models/deprecated/xlm_prophetnet/__init__.py index d9c24d9b4d..850d2958cb 100644 --- a/src/transformers/models/xlm_prophetnet/__init__.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available +from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available _import_structure = { diff --git a/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py b/src/transformers/models/deprecated/xlm_prophetnet/configuration_xlm_prophetnet.py similarity index 99% rename from src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py rename to src/transformers/models/deprecated/xlm_prophetnet/configuration_xlm_prophetnet.py index 94d38242b6..5d3f63670f 100644 --- a/src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/configuration_xlm_prophetnet.py @@ -16,8 +16,8 @@ from typing import Callable, Optional, Union -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py similarity index 99% rename from src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py rename to src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py index 669c21026e..68fb70d4f1 100644 --- a/src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py @@ -25,10 +25,10 @@ import torch.utils.checkpoint from torch import Tensor, nn from torch.nn import LayerNorm -from ...activations import ACT2FN -from ...modeling_outputs import BaseModelOutput -from ...modeling_utils import PreTrainedModel -from ...utils import ( +from ....activations import ACT2FN +from ....modeling_outputs import BaseModelOutput +from ....modeling_utils import PreTrainedModel +from ....utils import ( ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, diff --git a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py b/src/transformers/models/deprecated/xlm_prophetnet/tokenization_xlm_prophetnet.py similarity index 99% rename from src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py rename to src/transformers/models/deprecated/xlm_prophetnet/tokenization_xlm_prophetnet.py index fa65fa5cbf..87f4580019 100644 --- a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/tokenization_xlm_prophetnet.py @@ -18,8 +18,8 @@ import os from shutil import copyfile from typing import Any, Dict, List, Optional, Tuple -from ...tokenization_utils import PreTrainedTokenizer -from ...utils import logging +from ....tokenization_utils import PreTrainedTokenizer +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index 95d6a60183..18f8725da8 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -71,7 +71,6 @@ _IMAGE_CLASS_EXPECTED_OUTPUT = "tabby, tabby cat" @dataclass -# Copied from transformers.models.nat.modeling_nat.NatEncoderOutput with Nat->Dinat class DinatEncoderOutput(ModelOutput): """ Dinat encoder's outputs, with potential hidden states and attentions. @@ -105,7 +104,6 @@ class DinatEncoderOutput(ModelOutput): @dataclass -# Copied from transformers.models.nat.modeling_nat.NatModelOutput with Nat->Dinat class DinatModelOutput(ModelOutput): """ Dinat model's outputs that also contains a pooling of the last hidden states. @@ -142,7 +140,6 @@ class DinatModelOutput(ModelOutput): @dataclass -# Copied from transformers.models.nat.modeling_nat.NatImageClassifierOutput with Nat->Dinat class DinatImageClassifierOutput(ModelOutput): """ Dinat outputs for image classification. @@ -178,7 +175,6 @@ class DinatImageClassifierOutput(ModelOutput): reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None -# Copied from transformers.models.nat.modeling_nat.NatEmbeddings with Nat->Dinat class DinatEmbeddings(nn.Module): """ Construct the patch and position embeddings. @@ -201,7 +197,6 @@ class DinatEmbeddings(nn.Module): return embeddings -# Copied from transformers.models.nat.modeling_nat.NatPatchEmbeddings with Nat->Dinat class DinatPatchEmbeddings(nn.Module): """ This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial @@ -238,7 +233,6 @@ class DinatPatchEmbeddings(nn.Module): return embeddings -# Copied from transformers.models.nat.modeling_nat.NatDownsampler with Nat->Dinat class DinatDownsampler(nn.Module): """ Convolutional Downsampling Layer. @@ -321,7 +315,6 @@ class NeighborhoodAttention(nn.Module): self.dropout = nn.Dropout(config.attention_probs_dropout_prob) - # Copied from transformers.models.nat.modeling_nat.NeighborhoodAttention.transpose_for_scores with Nat->Dinat def transpose_for_scores(self, x): new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) x = x.view(new_x_shape) @@ -361,7 +354,6 @@ class NeighborhoodAttention(nn.Module): return outputs -# Copied from transformers.models.nat.modeling_nat.NeighborhoodAttentionOutput class NeighborhoodAttentionOutput(nn.Module): def __init__(self, config, dim): super().__init__() @@ -382,7 +374,6 @@ class NeighborhoodAttentionModule(nn.Module): self.output = NeighborhoodAttentionOutput(config, dim) self.pruned_heads = set() - # Copied from transformers.models.nat.modeling_nat.NeighborhoodAttentionModule.prune_heads def prune_heads(self, heads): if len(heads) == 0: return @@ -401,7 +392,6 @@ class NeighborhoodAttentionModule(nn.Module): self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads self.pruned_heads = self.pruned_heads.union(heads) - # Copied from transformers.models.nat.modeling_nat.NeighborhoodAttentionModule.forward def forward( self, hidden_states: torch.Tensor, @@ -413,7 +403,6 @@ class NeighborhoodAttentionModule(nn.Module): return outputs -# Copied from transformers.models.nat.modeling_nat.NatIntermediate with Nat->Dinat class DinatIntermediate(nn.Module): def __init__(self, config, dim): super().__init__() @@ -429,7 +418,6 @@ class DinatIntermediate(nn.Module): return hidden_states -# Copied from transformers.models.nat.modeling_nat.NatOutput with Nat->Dinat class DinatOutput(nn.Module): def __init__(self, config, dim): super().__init__() @@ -539,7 +527,6 @@ class DinatStage(nn.Module): self.pointing = False - # Copied from transformers.models.nat.modeling_nat.NatStage.forward def forward( self, hidden_states: torch.Tensor, @@ -582,7 +569,6 @@ class DinatEncoder(nn.Module): ] ) - # Copied from transformers.models.nat.modeling_nat.NatEncoder.forward with Nat->Dinat def forward( self, hidden_states: torch.Tensor, @@ -687,7 +673,6 @@ DINAT_INPUTS_DOCSTRING = r""" "The bare Dinat Model transformer outputting raw hidden-states without any specific head on top.", DINAT_START_DOCSTRING, ) -# Copied from transformers.models.nat.modeling_nat.NatModel with Nat->Dinat, NAT->DINAT class DinatModel(DinatPreTrainedModel): def __init__(self, config, add_pooling_layer=True): super().__init__(config) diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 5ac2a2ccbd..0cda4ed7b9 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -2609,6 +2609,174 @@ class DeiTPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +class DetaForObjectDetection(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DetaModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DetaPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class EfficientFormerForImageClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class EfficientFormerForImageClassificationWithTeacher(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class EfficientFormerModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class EfficientFormerPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMForInformationExtraction(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMForMultipleChoice(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMForQuestionAnswering(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMForSequenceClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ErnieMPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class GPTSanJapaneseForConditionalGeneration(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class GPTSanJapaneseModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class GPTSanJapanesePreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class GraphormerForGraphClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class GraphormerModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class GraphormerPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class JukeboxModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class JukeboxPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class JukeboxPrior(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class JukeboxVQVAE(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class MCTCTForCTC(metaclass=DummyObject): _backends = ["torch"] @@ -2630,6 +2798,62 @@ class MCTCTPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +class MegaForCausalLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaForMaskedLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaForMultipleChoice(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaForQuestionAnswering(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaForSequenceClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class MegaPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class MMBTForClassification(metaclass=DummyObject): _backends = ["torch"] @@ -2651,6 +2875,97 @@ class ModalEmbeddings(metaclass=DummyObject): requires_backends(self, ["torch"]) +class NatBackbone(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NatForImageClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NatModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NatPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForMaskedLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForMultipleChoice(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForNextSentencePrediction(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForPreTraining(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForQuestionAnswering(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForSequenceClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class NezhaPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class OpenLlamaForCausalLM(metaclass=DummyObject): _backends = ["torch"] @@ -2679,6 +2994,133 @@ class OpenLlamaPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +class QDQBertForMaskedLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertForMultipleChoice(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertForNextSentencePrediction(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertForQuestionAnswering(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertForSequenceClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertForTokenClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertLayer(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertLMHeadModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class QDQBertPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +def load_tf_weights_in_qdqbert(*args, **kwargs): + requires_backends(load_tf_weights_in_qdqbert, ["torch"]) + + +class RealmEmbedder(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class RealmForOpenQA(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class RealmKnowledgeAugEncoder(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class RealmPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class RealmReader(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class RealmRetriever(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class RealmScorer(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +def load_tf_weights_in_realm(*args, **kwargs): + requires_backends(load_tf_weights_in_realm, ["torch"]) + + class RetriBertModel(metaclass=DummyObject): _backends = ["torch"] @@ -2693,6 +3135,20 @@ class RetriBertPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +class Speech2Text2ForCausalLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class Speech2Text2PreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class TrajectoryTransformerModel(metaclass=DummyObject): _backends = ["torch"] @@ -2746,6 +3202,34 @@ def load_tf_weights_in_transfo_xl(*args, **kwargs): requires_backends(load_tf_weights_in_transfo_xl, ["torch"]) +class TvltForAudioVisualClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class TvltForPreTraining(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class TvltModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class TvltPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class VanForImageClassification(metaclass=DummyObject): _backends = ["torch"] @@ -2767,6 +3251,69 @@ class VanPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +class ViTHybridForImageClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ViTHybridModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ViTHybridPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class XLMProphetNetDecoder(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class XLMProphetNetEncoder(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class XLMProphetNetForCausalLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class XLMProphetNetForConditionalGeneration(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class XLMProphetNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class XLMProphetNetPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class DepthAnythingForDepthEstimation(metaclass=DummyObject): _backends = ["torch"] @@ -2781,27 +3328,6 @@ class DepthAnythingPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class DetaForObjectDetection(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class DetaModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class DetaPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class DetrForObjectDetection(metaclass=DummyObject): _backends = ["torch"] @@ -3026,34 +3552,6 @@ class DPTPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class EfficientFormerForImageClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class EfficientFormerForImageClassificationWithTeacher(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class EfficientFormerModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class EfficientFormerPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class EfficientNetForImageClassification(metaclass=DummyObject): _backends = ["torch"] @@ -3233,55 +3731,6 @@ class ErniePreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class ErnieMForInformationExtraction(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ErnieMForMultipleChoice(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ErnieMForQuestionAnswering(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ErnieMForSequenceClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ErnieMForTokenClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ErnieMModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ErnieMPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class EsmFoldPreTrainedModel(metaclass=DummyObject): _backends = ["torch"] @@ -4043,48 +4492,6 @@ class GPTJPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class GPTSanJapaneseForConditionalGeneration(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class GPTSanJapaneseModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class GPTSanJapanesePreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class GraphormerForGraphClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class GraphormerModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class GraphormerPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class GroundingDinoForObjectDetection(metaclass=DummyObject): _backends = ["torch"] @@ -4404,34 +4811,6 @@ class JetMoePreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class JukeboxModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class JukeboxPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class JukeboxPrior(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class JukeboxVQVAE(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class Kosmos2ForConditionalGeneration(metaclass=DummyObject): _backends = ["torch"] @@ -5125,62 +5504,6 @@ class MBartPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class MegaForCausalLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaForMaskedLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaForMultipleChoice(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaForQuestionAnswering(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaForSequenceClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaForTokenClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class MegaPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class MegatronBertForCausalLM(metaclass=DummyObject): _backends = ["torch"] @@ -5830,97 +6153,6 @@ class MvpPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class NatBackbone(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NatForImageClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NatModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NatPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForMaskedLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForMultipleChoice(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForNextSentencePrediction(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForPreTraining(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForQuestionAnswering(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForSequenceClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaForTokenClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class NezhaPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class NllbMoeForConditionalGeneration(metaclass=DummyObject): _backends = ["torch"] @@ -6716,80 +6948,6 @@ class PvtV2PreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class QDQBertForMaskedLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertForMultipleChoice(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertForNextSentencePrediction(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertForQuestionAnswering(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertForSequenceClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertForTokenClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertLayer(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertLMHeadModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class QDQBertPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -def load_tf_weights_in_qdqbert(*args, **kwargs): - requires_backends(load_tf_weights_in_qdqbert, ["torch"]) - - class Qwen2ForCausalLM(metaclass=DummyObject): _backends = ["torch"] @@ -6888,59 +7046,6 @@ class RagTokenForGeneration(metaclass=DummyObject): requires_backends(self, ["torch"]) -class RealmEmbedder(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class RealmForOpenQA(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class RealmKnowledgeAugEncoder(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class RealmPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class RealmReader(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class RealmRetriever(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class RealmScorer(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -def load_tf_weights_in_realm(*args, **kwargs): - requires_backends(load_tf_weights_in_realm, ["torch"]) - - class RecurrentGemmaForCausalLM(metaclass=DummyObject): _backends = ["torch"] @@ -7716,20 +7821,6 @@ class Speech2TextPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class Speech2Text2ForCausalLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class Speech2Text2PreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class SpeechT5ForSpeechToSpeech(metaclass=DummyObject): _backends = ["torch"] @@ -8277,34 +8368,6 @@ class TrOCRPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class TvltForAudioVisualClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class TvltForPreTraining(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class TvltModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class TvltPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class TvpForVideoGrounding(metaclass=DummyObject): _backends = ["torch"] @@ -8725,27 +8788,6 @@ class ViTPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class ViTHybridForImageClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ViTHybridModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class ViTHybridPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class ViTMAEForPreTraining(metaclass=DummyObject): _backends = ["torch"] @@ -9194,48 +9236,6 @@ class XLMWithLMHeadModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class XLMProphetNetDecoder(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class XLMProphetNetEncoder(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class XLMProphetNetForCausalLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class XLMProphetNetForConditionalGeneration(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class XLMProphetNetModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class XLMProphetNetPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class XLMRobertaForCausalLM(metaclass=DummyObject): _backends = ["torch"] diff --git a/src/transformers/utils/dummy_sentencepiece_objects.py b/src/transformers/utils/dummy_sentencepiece_objects.py index 33ee907a74..8977b4f51b 100644 --- a/src/transformers/utils/dummy_sentencepiece_objects.py +++ b/src/transformers/utils/dummy_sentencepiece_objects.py @@ -72,6 +72,13 @@ class ErnieMTokenizer(metaclass=DummyObject): requires_backends(self, ["sentencepiece"]) +class XLMProphetNetTokenizer(metaclass=DummyObject): + _backends = ["sentencepiece"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["sentencepiece"]) + + class FNetTokenizer(metaclass=DummyObject): _backends = ["sentencepiece"] @@ -233,13 +240,6 @@ class XGLMTokenizer(metaclass=DummyObject): requires_backends(self, ["sentencepiece"]) -class XLMProphetNetTokenizer(metaclass=DummyObject): - _backends = ["sentencepiece"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["sentencepiece"]) - - class XLMRobertaTokenizer(metaclass=DummyObject): _backends = ["sentencepiece"] diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py index 337b0938b3..942a7afced 100644 --- a/src/transformers/utils/dummy_tf_objects.py +++ b/src/transformers/utils/dummy_tf_objects.py @@ -1038,6 +1038,34 @@ class TFDeiTPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["tf"]) +class TFEfficientFormerForImageClassification(metaclass=DummyObject): + _backends = ["tf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tf"]) + + +class TFEfficientFormerForImageClassificationWithTeacher(metaclass=DummyObject): + _backends = ["tf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tf"]) + + +class TFEfficientFormerModel(metaclass=DummyObject): + _backends = ["tf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tf"]) + + +class TFEfficientFormerPreTrainedModel(metaclass=DummyObject): + _backends = ["tf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tf"]) + + class TFAdaptiveEmbedding(metaclass=DummyObject): _backends = ["tf"] @@ -1178,34 +1206,6 @@ class TFDPRReader(metaclass=DummyObject): requires_backends(self, ["tf"]) -class TFEfficientFormerForImageClassification(metaclass=DummyObject): - _backends = ["tf"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["tf"]) - - -class TFEfficientFormerForImageClassificationWithTeacher(metaclass=DummyObject): - _backends = ["tf"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["tf"]) - - -class TFEfficientFormerModel(metaclass=DummyObject): - _backends = ["tf"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["tf"]) - - -class TFEfficientFormerPreTrainedModel(metaclass=DummyObject): - _backends = ["tf"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["tf"]) - - class TFElectraForMaskedLM(metaclass=DummyObject): _backends = ["tf"] diff --git a/src/transformers/utils/dummy_tokenizers_objects.py b/src/transformers/utils/dummy_tokenizers_objects.py index 0b7ddf119d..df83e6fa64 100644 --- a/src/transformers/utils/dummy_tokenizers_objects.py +++ b/src/transformers/utils/dummy_tokenizers_objects.py @@ -121,6 +121,13 @@ class DebertaV2TokenizerFast(metaclass=DummyObject): requires_backends(self, ["tokenizers"]) +class RealmTokenizerFast(metaclass=DummyObject): + _backends = ["tokenizers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tokenizers"]) + + class RetriBertTokenizerFast(metaclass=DummyObject): _backends = ["tokenizers"] @@ -352,13 +359,6 @@ class Qwen2TokenizerFast(metaclass=DummyObject): requires_backends(self, ["tokenizers"]) -class RealmTokenizerFast(metaclass=DummyObject): - _backends = ["tokenizers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["tokenizers"]) - - class ReformerTokenizerFast(metaclass=DummyObject): _backends = ["tokenizers"] diff --git a/src/transformers/utils/dummy_vision_objects.py b/src/transformers/utils/dummy_vision_objects.py index d32778d4b5..aae31e9e4d 100644 --- a/src/transformers/utils/dummy_vision_objects.py +++ b/src/transformers/utils/dummy_vision_objects.py @@ -142,6 +142,27 @@ class DetaImageProcessor(metaclass=DummyObject): requires_backends(self, ["vision"]) +class EfficientFormerImageProcessor(metaclass=DummyObject): + _backends = ["vision"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["vision"]) + + +class TvltImageProcessor(metaclass=DummyObject): + _backends = ["vision"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["vision"]) + + +class ViTHybridImageProcessor(metaclass=DummyObject): + _backends = ["vision"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["vision"]) + + class DetrFeatureExtractor(metaclass=DummyObject): _backends = ["vision"] @@ -184,13 +205,6 @@ class DPTImageProcessor(metaclass=DummyObject): requires_backends(self, ["vision"]) -class EfficientFormerImageProcessor(metaclass=DummyObject): - _backends = ["vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["vision"]) - - class EfficientNetImageProcessor(metaclass=DummyObject): _backends = ["vision"] @@ -520,13 +534,6 @@ class Swin2SRImageProcessor(metaclass=DummyObject): requires_backends(self, ["vision"]) -class TvltImageProcessor(metaclass=DummyObject): - _backends = ["vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["vision"]) - - class TvpImageProcessor(metaclass=DummyObject): _backends = ["vision"] @@ -590,13 +597,6 @@ class ViTImageProcessor(metaclass=DummyObject): requires_backends(self, ["vision"]) -class ViTHybridImageProcessor(metaclass=DummyObject): - _backends = ["vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["vision"]) - - class VitMatteImageProcessor(metaclass=DummyObject): _backends = ["vision"] diff --git a/tests/models/deta/__init__.py b/tests/models/deta/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/deta/test_image_processing_deta.py b/tests/models/deta/test_image_processing_deta.py deleted file mode 100644 index 3ea5885b0e..0000000000 --- a/tests/models/deta/test_image_processing_deta.py +++ /dev/null @@ -1,535 +0,0 @@ -# coding=utf-8 -# Copyright 2022 HuggingFace Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import json -import pathlib -import unittest - -from transformers.testing_utils import require_torch, require_vision, slow -from transformers.utils import is_torch_available, is_vision_available - -from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs - - -if is_torch_available(): - import torch - -if is_vision_available(): - from PIL import Image - - from transformers import DetaImageProcessor - - -class DetaImageProcessingTester(unittest.TestCase): - def __init__( - self, - parent, - batch_size=7, - num_channels=3, - min_resolution=30, - max_resolution=400, - do_resize=True, - size=None, - do_normalize=True, - image_mean=[0.5, 0.5, 0.5], - image_std=[0.5, 0.5, 0.5], - do_rescale=True, - rescale_factor=1 / 255, - do_pad=True, - ): - # by setting size["longest_edge"] > max_resolution we're effectively not testing this :p - size = size if size is not None else {"shortest_edge": 18, "longest_edge": 1333} - self.parent = parent - self.batch_size = batch_size - self.num_channels = num_channels - self.min_resolution = min_resolution - self.max_resolution = max_resolution - self.do_resize = do_resize - self.size = size - self.do_normalize = do_normalize - self.image_mean = image_mean - self.image_std = image_std - self.do_rescale = do_rescale - self.rescale_factor = rescale_factor - self.do_pad = do_pad - - def prepare_image_processor_dict(self): - return { - "do_resize": self.do_resize, - "size": self.size, - "do_normalize": self.do_normalize, - "image_mean": self.image_mean, - "image_std": self.image_std, - "do_rescale": self.do_rescale, - "rescale_factor": self.rescale_factor, - "do_pad": self.do_pad, - } - - def get_expected_values(self, image_inputs, batched=False): - """ - This function computes the expected height and width when providing images to DetaImageProcessor, - assuming do_resize is set to True with a scalar size. - """ - if not batched: - image = image_inputs[0] - if isinstance(image, Image.Image): - w, h = image.size - else: - h, w = image.shape[1], image.shape[2] - if w < h: - expected_height = int(self.size["shortest_edge"] * h / w) - expected_width = self.size["shortest_edge"] - elif w > h: - expected_height = self.size["shortest_edge"] - expected_width = int(self.size["shortest_edge"] * w / h) - else: - expected_height = self.size["shortest_edge"] - expected_width = self.size["shortest_edge"] - - else: - expected_values = [] - for image in image_inputs: - expected_height, expected_width = self.get_expected_values([image]) - expected_values.append((expected_height, expected_width)) - expected_height = max(expected_values, key=lambda item: item[0])[0] - expected_width = max(expected_values, key=lambda item: item[1])[1] - - return expected_height, expected_width - - def expected_output_image_shape(self, images): - height, width = self.get_expected_values(images, batched=True) - return self.num_channels, height, width - - def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): - return prepare_image_inputs( - batch_size=self.batch_size, - num_channels=self.num_channels, - min_resolution=self.min_resolution, - max_resolution=self.max_resolution, - equal_resolution=equal_resolution, - numpify=numpify, - torchify=torchify, - ) - - -@require_torch -@require_vision -class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase): - image_processing_class = DetaImageProcessor if is_vision_available() else None - - def setUp(self): - self.image_processor_tester = DetaImageProcessingTester(self) - - @property - def image_processor_dict(self): - return self.image_processor_tester.prepare_image_processor_dict() - - def test_image_processor_properties(self): - image_processing = self.image_processing_class(**self.image_processor_dict) - self.assertTrue(hasattr(image_processing, "image_mean")) - self.assertTrue(hasattr(image_processing, "image_std")) - self.assertTrue(hasattr(image_processing, "do_normalize")) - self.assertTrue(hasattr(image_processing, "do_resize")) - self.assertTrue(hasattr(image_processing, "do_rescale")) - self.assertTrue(hasattr(image_processing, "do_pad")) - self.assertTrue(hasattr(image_processing, "size")) - - def test_image_processor_from_dict_with_kwargs(self): - image_processor = self.image_processing_class.from_dict(self.image_processor_dict) - self.assertEqual(image_processor.size, {"shortest_edge": 18, "longest_edge": 1333}) - self.assertEqual(image_processor.do_pad, True) - - @slow - def test_call_pytorch_with_coco_detection_annotations(self): - # prepare image and target - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f: - target = json.loads(f.read()) - - target = {"image_id": 39769, "annotations": target} - - # encode them - image_processing = DetaImageProcessor() - encoding = image_processing(images=image, annotations=target, return_tensors="pt") - - # verify pixel values - expected_shape = torch.Size([1, 3, 800, 1066]) - self.assertEqual(encoding["pixel_values"].shape, expected_shape) - - expected_slice = torch.tensor([0.2796, 0.3138, 0.3481]) - self.assertTrue(torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4)) - - # verify area - expected_area = torch.tensor([5887.9600, 11250.2061, 489353.8438, 837122.7500, 147967.5156, 165732.3438]) - self.assertTrue(torch.allclose(encoding["labels"][0]["area"], expected_area)) - # verify boxes - expected_boxes_shape = torch.Size([6, 4]) - self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape) - expected_boxes_slice = torch.tensor([0.5503, 0.2765, 0.0604, 0.2215]) - self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3)) - # verify image_id - expected_image_id = torch.tensor([39769]) - self.assertTrue(torch.allclose(encoding["labels"][0]["image_id"], expected_image_id)) - # verify is_crowd - expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0]) - self.assertTrue(torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd)) - # verify class_labels - expected_class_labels = torch.tensor([75, 75, 63, 65, 17, 17]) - self.assertTrue(torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels)) - # verify orig_size - expected_orig_size = torch.tensor([480, 640]) - self.assertTrue(torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size)) - # verify size - expected_size = torch.tensor([800, 1066]) - self.assertTrue(torch.allclose(encoding["labels"][0]["size"], expected_size)) - - @slow - def test_call_pytorch_with_coco_panoptic_annotations(self): - # prepare image, target and masks_path - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - with open("./tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt", "r") as f: - target = json.loads(f.read()) - - target = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target} - - masks_path = pathlib.Path("./tests/fixtures/tests_samples/COCO/coco_panoptic") - - # encode them - image_processing = DetaImageProcessor(format="coco_panoptic") - encoding = image_processing(images=image, annotations=target, masks_path=masks_path, return_tensors="pt") - - # verify pixel values - expected_shape = torch.Size([1, 3, 800, 1066]) - self.assertEqual(encoding["pixel_values"].shape, expected_shape) - - expected_slice = torch.tensor([0.2796, 0.3138, 0.3481]) - self.assertTrue(torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4)) - - # verify area - expected_area = torch.tensor([147979.6875, 165527.0469, 484638.5938, 11292.9375, 5879.6562, 7634.1147]) - self.assertTrue(torch.allclose(encoding["labels"][0]["area"], expected_area)) - # verify boxes - expected_boxes_shape = torch.Size([6, 4]) - self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape) - expected_boxes_slice = torch.tensor([0.2625, 0.5437, 0.4688, 0.8625]) - self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3)) - # verify image_id - expected_image_id = torch.tensor([39769]) - self.assertTrue(torch.allclose(encoding["labels"][0]["image_id"], expected_image_id)) - # verify is_crowd - expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0]) - self.assertTrue(torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd)) - # verify class_labels - expected_class_labels = torch.tensor([17, 17, 63, 75, 75, 93]) - self.assertTrue(torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels)) - # verify masks - expected_masks_sum = 822873 - self.assertEqual(encoding["labels"][0]["masks"].sum().item(), expected_masks_sum) - # verify orig_size - expected_orig_size = torch.tensor([480, 640]) - self.assertTrue(torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size)) - # verify size - expected_size = torch.tensor([800, 1066]) - self.assertTrue(torch.allclose(encoding["labels"][0]["size"], expected_size)) - - @slow - # Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_batched_coco_detection_annotations with Detr->Deta - def test_batched_coco_detection_annotations(self): - image_0 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - image_1 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png").resize((800, 800)) - - with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f: - target = json.loads(f.read()) - - annotations_0 = {"image_id": 39769, "annotations": target} - annotations_1 = {"image_id": 39769, "annotations": target} - - # Adjust the bounding boxes for the resized image - w_0, h_0 = image_0.size - w_1, h_1 = image_1.size - for i in range(len(annotations_1["annotations"])): - coords = annotations_1["annotations"][i]["bbox"] - new_bbox = [ - coords[0] * w_1 / w_0, - coords[1] * h_1 / h_0, - coords[2] * w_1 / w_0, - coords[3] * h_1 / h_0, - ] - annotations_1["annotations"][i]["bbox"] = new_bbox - - images = [image_0, image_1] - annotations = [annotations_0, annotations_1] - - image_processing = DetaImageProcessor() - encoding = image_processing( - images=images, - annotations=annotations, - return_segmentation_masks=True, - return_tensors="pt", # do_convert_annotations=True - ) - - # Check the pixel values have been padded - postprocessed_height, postprocessed_width = 800, 1066 - expected_shape = torch.Size([2, 3, postprocessed_height, postprocessed_width]) - self.assertEqual(encoding["pixel_values"].shape, expected_shape) - - # Check the bounding boxes have been adjusted for padded images - self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4])) - self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4])) - expected_boxes_0 = torch.tensor( - [ - [0.6879, 0.4609, 0.0755, 0.3691], - [0.2118, 0.3359, 0.2601, 0.1566], - [0.5011, 0.5000, 0.9979, 1.0000], - [0.5010, 0.5020, 0.9979, 0.9959], - [0.3284, 0.5944, 0.5884, 0.8112], - [0.8394, 0.5445, 0.3213, 0.9110], - ] - ) - expected_boxes_1 = torch.tensor( - [ - [0.4130, 0.2765, 0.0453, 0.2215], - [0.1272, 0.2016, 0.1561, 0.0940], - [0.3757, 0.4933, 0.7488, 0.9865], - [0.3759, 0.5002, 0.7492, 0.9955], - [0.1971, 0.5456, 0.3532, 0.8646], - [0.5790, 0.4115, 0.3430, 0.7161], - ] - ) - self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1e-3)) - self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1e-3)) - - # Check the masks have also been padded - self.assertEqual(encoding["labels"][0]["masks"].shape, torch.Size([6, 800, 1066])) - self.assertEqual(encoding["labels"][1]["masks"].shape, torch.Size([6, 800, 1066])) - - # Check if do_convert_annotations=False, then the annotations are not converted to centre_x, centre_y, width, height - # format and not in the range [0, 1] - encoding = image_processing( - images=images, - annotations=annotations, - return_segmentation_masks=True, - do_convert_annotations=False, - return_tensors="pt", - ) - self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4])) - self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4])) - # Convert to absolute coordinates - unnormalized_boxes_0 = torch.vstack( - [ - expected_boxes_0[:, 0] * postprocessed_width, - expected_boxes_0[:, 1] * postprocessed_height, - expected_boxes_0[:, 2] * postprocessed_width, - expected_boxes_0[:, 3] * postprocessed_height, - ] - ).T - unnormalized_boxes_1 = torch.vstack( - [ - expected_boxes_1[:, 0] * postprocessed_width, - expected_boxes_1[:, 1] * postprocessed_height, - expected_boxes_1[:, 2] * postprocessed_width, - expected_boxes_1[:, 3] * postprocessed_height, - ] - ).T - # Convert from centre_x, centre_y, width, height to x_min, y_min, x_max, y_max - expected_boxes_0 = torch.vstack( - [ - unnormalized_boxes_0[:, 0] - unnormalized_boxes_0[:, 2] / 2, - unnormalized_boxes_0[:, 1] - unnormalized_boxes_0[:, 3] / 2, - unnormalized_boxes_0[:, 0] + unnormalized_boxes_0[:, 2] / 2, - unnormalized_boxes_0[:, 1] + unnormalized_boxes_0[:, 3] / 2, - ] - ).T - expected_boxes_1 = torch.vstack( - [ - unnormalized_boxes_1[:, 0] - unnormalized_boxes_1[:, 2] / 2, - unnormalized_boxes_1[:, 1] - unnormalized_boxes_1[:, 3] / 2, - unnormalized_boxes_1[:, 0] + unnormalized_boxes_1[:, 2] / 2, - unnormalized_boxes_1[:, 1] + unnormalized_boxes_1[:, 3] / 2, - ] - ).T - self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1)) - self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1)) - - # Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_batched_coco_panoptic_annotations with Detr->Deta - def test_batched_coco_panoptic_annotations(self): - # prepare image, target and masks_path - image_0 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - image_1 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png").resize((800, 800)) - - with open("./tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt", "r") as f: - target = json.loads(f.read()) - - annotation_0 = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target} - annotation_1 = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target} - - w_0, h_0 = image_0.size - w_1, h_1 = image_1.size - for i in range(len(annotation_1["segments_info"])): - coords = annotation_1["segments_info"][i]["bbox"] - new_bbox = [ - coords[0] * w_1 / w_0, - coords[1] * h_1 / h_0, - coords[2] * w_1 / w_0, - coords[3] * h_1 / h_0, - ] - annotation_1["segments_info"][i]["bbox"] = new_bbox - - masks_path = pathlib.Path("./tests/fixtures/tests_samples/COCO/coco_panoptic") - - images = [image_0, image_1] - annotations = [annotation_0, annotation_1] - - # encode them - image_processing = DetaImageProcessor(format="coco_panoptic") - encoding = image_processing( - images=images, - annotations=annotations, - masks_path=masks_path, - return_tensors="pt", - return_segmentation_masks=True, - ) - - # Check the pixel values have been padded - postprocessed_height, postprocessed_width = 800, 1066 - expected_shape = torch.Size([2, 3, postprocessed_height, postprocessed_width]) - self.assertEqual(encoding["pixel_values"].shape, expected_shape) - - # Check the bounding boxes have been adjusted for padded images - self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4])) - self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4])) - expected_boxes_0 = torch.tensor( - [ - [0.2625, 0.5437, 0.4688, 0.8625], - [0.7719, 0.4104, 0.4531, 0.7125], - [0.5000, 0.4927, 0.9969, 0.9854], - [0.1688, 0.2000, 0.2063, 0.0917], - [0.5492, 0.2760, 0.0578, 0.2187], - [0.4992, 0.4990, 0.9984, 0.9979], - ] - ) - expected_boxes_1 = torch.tensor( - [ - [0.1576, 0.3262, 0.2814, 0.5175], - [0.4634, 0.2463, 0.2720, 0.4275], - [0.3002, 0.2956, 0.5985, 0.5913], - [0.1013, 0.1200, 0.1238, 0.0550], - [0.3297, 0.1656, 0.0347, 0.1312], - [0.2997, 0.2994, 0.5994, 0.5987], - ] - ) - self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1e-3)) - self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1e-3)) - - # Check the masks have also been padded - self.assertEqual(encoding["labels"][0]["masks"].shape, torch.Size([6, 800, 1066])) - self.assertEqual(encoding["labels"][1]["masks"].shape, torch.Size([6, 800, 1066])) - - # Check if do_convert_annotations=False, then the annotations are not converted to centre_x, centre_y, width, height - # format and not in the range [0, 1] - encoding = image_processing( - images=images, - annotations=annotations, - masks_path=masks_path, - return_segmentation_masks=True, - do_convert_annotations=False, - return_tensors="pt", - ) - self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4])) - self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4])) - # Convert to absolute coordinates - unnormalized_boxes_0 = torch.vstack( - [ - expected_boxes_0[:, 0] * postprocessed_width, - expected_boxes_0[:, 1] * postprocessed_height, - expected_boxes_0[:, 2] * postprocessed_width, - expected_boxes_0[:, 3] * postprocessed_height, - ] - ).T - unnormalized_boxes_1 = torch.vstack( - [ - expected_boxes_1[:, 0] * postprocessed_width, - expected_boxes_1[:, 1] * postprocessed_height, - expected_boxes_1[:, 2] * postprocessed_width, - expected_boxes_1[:, 3] * postprocessed_height, - ] - ).T - # Convert from centre_x, centre_y, width, height to x_min, y_min, x_max, y_max - expected_boxes_0 = torch.vstack( - [ - unnormalized_boxes_0[:, 0] - unnormalized_boxes_0[:, 2] / 2, - unnormalized_boxes_0[:, 1] - unnormalized_boxes_0[:, 3] / 2, - unnormalized_boxes_0[:, 0] + unnormalized_boxes_0[:, 2] / 2, - unnormalized_boxes_0[:, 1] + unnormalized_boxes_0[:, 3] / 2, - ] - ).T - expected_boxes_1 = torch.vstack( - [ - unnormalized_boxes_1[:, 0] - unnormalized_boxes_1[:, 2] / 2, - unnormalized_boxes_1[:, 1] - unnormalized_boxes_1[:, 3] / 2, - unnormalized_boxes_1[:, 0] + unnormalized_boxes_1[:, 2] / 2, - unnormalized_boxes_1[:, 1] + unnormalized_boxes_1[:, 3] / 2, - ] - ).T - self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1)) - self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1)) - - # Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->Deta - def test_max_width_max_height_resizing_and_pad_strategy(self): - image_1 = torch.ones([200, 100, 3], dtype=torch.uint8) - - # do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50 - image_processor = DetaImageProcessor( - size={"max_height": 100, "max_width": 100}, - do_pad=False, - ) - inputs = image_processor(images=[image_1], return_tensors="pt") - self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50])) - - # do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100 - image_processor = DetaImageProcessor( - size={"max_height": 300, "max_width": 100}, - do_pad=False, - ) - inputs = image_processor(images=[image_1], return_tensors="pt") - - # do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100 - image_processor = DetaImageProcessor( - size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100} - ) - inputs = image_processor(images=[image_1], return_tensors="pt") - self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100])) - - # do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100 - image_processor = DetaImageProcessor( - size={"max_height": 300, "max_width": 100}, - do_pad=True, - pad_size={"height": 301, "width": 101}, - ) - inputs = image_processor(images=[image_1], return_tensors="pt") - self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101])) - - ### Check for batch - image_2 = torch.ones([100, 150, 3], dtype=torch.uint8) - - # do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100 - image_processor = DetaImageProcessor( - size={"max_height": 150, "max_width": 100}, - do_pad=True, - pad_size={"height": 150, "width": 100}, - ) - inputs = image_processor(images=[image_1, image_2], return_tensors="pt") - self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100])) diff --git a/tests/models/deta/test_modeling_deta.py b/tests/models/deta/test_modeling_deta.py deleted file mode 100644 index fa840212a5..0000000000 --- a/tests/models/deta/test_modeling_deta.py +++ /dev/null @@ -1,671 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch DETA model.""" - -import collections -import inspect -import math -import re -import unittest - -from transformers import DetaConfig, ResNetConfig, is_torch_available, is_torchvision_available, is_vision_available -from transformers.file_utils import cached_property -from transformers.testing_utils import require_torchvision, require_vision, slow, torch_device - -from ...generation.test_utils import GenerationTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers.pytorch_utils import id_tensor_storage - -if is_torchvision_available(): - from transformers import DetaForObjectDetection, DetaModel - - -if is_vision_available(): - from PIL import Image - - from transformers import AutoImageProcessor - - -class DetaModelTester: - def __init__( - self, - parent, - batch_size=8, - is_training=True, - use_labels=True, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=8, - intermediate_size=4, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - num_queries=12, - two_stage_num_proposals=12, - num_channels=3, - image_size=224, - n_targets=8, - num_labels=91, - num_feature_levels=4, - encoder_n_points=2, - decoder_n_points=6, - two_stage=True, - assign_first_stage=True, - assign_second_stage=True, - ): - self.parent = parent - self.batch_size = batch_size - self.is_training = is_training - self.use_labels = use_labels - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.num_queries = num_queries - self.two_stage_num_proposals = two_stage_num_proposals - self.num_channels = num_channels - self.image_size = image_size - self.n_targets = n_targets - self.num_labels = num_labels - self.num_feature_levels = num_feature_levels - self.encoder_n_points = encoder_n_points - self.decoder_n_points = decoder_n_points - self.two_stage = two_stage - self.assign_first_stage = assign_first_stage - self.assign_second_stage = assign_second_stage - - # we also set the expected seq length for both encoder and decoder - self.encoder_seq_length = ( - math.ceil(self.image_size / 8) ** 2 - + math.ceil(self.image_size / 16) ** 2 - + math.ceil(self.image_size / 32) ** 2 - + math.ceil(self.image_size / 64) ** 2 - ) - self.decoder_seq_length = self.num_queries - - def prepare_config_and_inputs(self, model_class_name): - pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) - - pixel_mask = torch.ones([self.batch_size, self.image_size, self.image_size], device=torch_device) - - labels = None - if self.use_labels: - # labels is a list of Dict (each Dict being the labels for a given example in the batch) - labels = [] - for i in range(self.batch_size): - target = {} - target["class_labels"] = torch.randint( - high=self.num_labels, size=(self.n_targets,), device=torch_device - ) - target["boxes"] = torch.rand(self.n_targets, 4, device=torch_device) - target["masks"] = torch.rand(self.n_targets, self.image_size, self.image_size, device=torch_device) - labels.append(target) - - config = self.get_config(model_class_name) - return config, pixel_values, pixel_mask, labels - - def get_config(self, model_class_name): - resnet_config = ResNetConfig( - num_channels=3, - embeddings_size=10, - hidden_sizes=[10, 20, 30, 40], - depths=[1, 1, 2, 1], - hidden_act="relu", - num_labels=3, - out_features=["stage2", "stage3", "stage4"], - out_indices=[2, 3, 4], - ) - two_stage = model_class_name == "DetaForObjectDetection" - assign_first_stage = model_class_name == "DetaForObjectDetection" - assign_second_stage = model_class_name == "DetaForObjectDetection" - return DetaConfig( - d_model=self.hidden_size, - encoder_layers=self.num_hidden_layers, - decoder_layers=self.num_hidden_layers, - encoder_attention_heads=self.num_attention_heads, - decoder_attention_heads=self.num_attention_heads, - encoder_ffn_dim=self.intermediate_size, - decoder_ffn_dim=self.intermediate_size, - dropout=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - num_queries=self.num_queries, - two_stage_num_proposals=self.two_stage_num_proposals, - num_labels=self.num_labels, - num_feature_levels=self.num_feature_levels, - encoder_n_points=self.encoder_n_points, - decoder_n_points=self.decoder_n_points, - two_stage=two_stage, - assign_first_stage=assign_first_stage, - assign_second_stage=assign_second_stage, - backbone_config=resnet_config, - backbone=None, - ) - - def prepare_config_and_inputs_for_common(self, model_class_name="DetaModel"): - config, pixel_values, pixel_mask, labels = self.prepare_config_and_inputs(model_class_name) - inputs_dict = {"pixel_values": pixel_values, "pixel_mask": pixel_mask} - return config, inputs_dict - - def create_and_check_deta_model(self, config, pixel_values, pixel_mask, labels): - model = DetaModel(config=config) - model.to(torch_device) - model.eval() - - result = model(pixel_values=pixel_values, pixel_mask=pixel_mask) - result = model(pixel_values) - - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.num_queries, self.hidden_size)) - - def create_and_check_deta_freeze_backbone(self, config, pixel_values, pixel_mask, labels): - model = DetaModel(config=config) - model.to(torch_device) - model.eval() - - model.freeze_backbone() - - for _, param in model.backbone.model.named_parameters(): - self.parent.assertEqual(False, param.requires_grad) - - def create_and_check_deta_unfreeze_backbone(self, config, pixel_values, pixel_mask, labels): - model = DetaModel(config=config) - model.to(torch_device) - model.eval() - - model.unfreeze_backbone() - - for _, param in model.backbone.model.named_parameters(): - self.parent.assertEqual(True, param.requires_grad) - - def create_and_check_deta_object_detection_head_model(self, config, pixel_values, pixel_mask, labels): - model = DetaForObjectDetection(config=config) - model.to(torch_device) - model.eval() - - result = model(pixel_values=pixel_values, pixel_mask=pixel_mask) - result = model(pixel_values) - - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.two_stage_num_proposals, self.num_labels)) - self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.two_stage_num_proposals, 4)) - - result = model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels) - - self.parent.assertEqual(result.loss.shape, ()) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.two_stage_num_proposals, self.num_labels)) - self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.two_stage_num_proposals, 4)) - - -@require_torchvision -class DetaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = (DetaModel, DetaForObjectDetection) if is_torchvision_available() else () - pipeline_model_mapping = ( - {"image-feature-extraction": DetaModel, "object-detection": DetaForObjectDetection} - if is_torchvision_available() - else {} - ) - is_encoder_decoder = True - test_torchscript = False - test_pruning = False - test_head_masking = False - test_missing_keys = False - - # TODO: Fix the failed tests when this model gets more usage - def is_pipeline_test_to_skip( - self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name - ): - if pipeline_test_casse_name == "ObjectDetectionPipelineTests": - return True - - return False - - @unittest.skip("Skip for now. PR #22437 causes some loading issue. See (not merged) #22656 for some discussions.") - def test_can_use_safetensors(self): - super().test_can_use_safetensors() - - # special case for head models - def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): - inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) - - if return_labels: - if model_class.__name__ == "DetaForObjectDetection": - labels = [] - for i in range(self.model_tester.batch_size): - target = {} - target["class_labels"] = torch.ones( - size=(self.model_tester.n_targets,), device=torch_device, dtype=torch.long - ) - target["boxes"] = torch.ones( - self.model_tester.n_targets, 4, device=torch_device, dtype=torch.float - ) - target["masks"] = torch.ones( - self.model_tester.n_targets, - self.model_tester.image_size, - self.model_tester.image_size, - device=torch_device, - dtype=torch.float, - ) - labels.append(target) - inputs_dict["labels"] = labels - - return inputs_dict - - def setUp(self): - self.model_tester = DetaModelTester(self) - self.config_tester = ConfigTester(self, config_class=DetaConfig, has_text_modality=False) - - def test_config(self): - # we don't test common_properties and arguments_init as these don't apply for DETA - self.config_tester.create_and_test_config_to_json_string() - self.config_tester.create_and_test_config_to_json_file() - self.config_tester.create_and_test_config_from_and_save_pretrained() - self.config_tester.create_and_test_config_with_num_labels() - self.config_tester.check_config_can_be_init_without_params() - - def test_deta_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaModel") - self.model_tester.create_and_check_deta_model(*config_and_inputs) - - def test_deta_freeze_backbone(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaModel") - self.model_tester.create_and_check_deta_freeze_backbone(*config_and_inputs) - - def test_deta_unfreeze_backbone(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaModel") - self.model_tester.create_and_check_deta_unfreeze_backbone(*config_and_inputs) - - def test_deta_object_detection_head_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaForObjectDetection") - self.model_tester.create_and_check_deta_object_detection_head_model(*config_and_inputs) - - @unittest.skip(reason="DETA does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="DETA does not use inputs_embeds") - def test_inputs_embeds_matches_input_ids(self): - pass - - @unittest.skip(reason="DETA does not have a get_input_embeddings method") - def test_model_common_attributes(self): - pass - - @unittest.skip(reason="DETA is not a generative model") - def test_generate_without_input_ids(self): - pass - - @unittest.skip(reason="DETA does not use token embeddings") - def test_resize_tokens_embeddings(self): - pass - - @unittest.skip(reason="Feed forward chunking is not implemented") - def test_feed_forward_chunking(self): - pass - - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.encoder_attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.encoder_attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [ - self.model_tester.num_attention_heads, - self.model_tester.num_feature_levels, - self.model_tester.encoder_n_points, - ], - ) - out_len = len(outputs) - - correct_outlen = 8 - - # loss is at first position - if "labels" in inputs_dict: - correct_outlen += 1 # loss is added to beginning - # Object Detection model returns pred_logits and pred_boxes - if model_class.__name__ == "DetaForObjectDetection": - correct_outlen += 2 - - self.assertEqual(out_len, correct_outlen) - - # decoder attentions - decoder_attentions = outputs.decoder_attentions - self.assertIsInstance(decoder_attentions, (list, tuple)) - self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(decoder_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, self.model_tester.num_queries, self.model_tester.num_queries], - ) - - # cross attentions - cross_attentions = outputs.cross_attentions - self.assertIsInstance(cross_attentions, (list, tuple)) - self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(cross_attentions[0].shape[-3:]), - [ - self.model_tester.num_attention_heads, - self.model_tester.num_feature_levels, - self.model_tester.decoder_n_points, - ], - ) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - if hasattr(self.model_tester, "num_hidden_states_types"): - added_hidden_states = self.model_tester.num_hidden_states_types - elif self.is_encoder_decoder: - added_hidden_states = 2 - else: - added_hidden_states = 1 - self.assertEqual(out_len + added_hidden_states, len(outputs)) - - self_attentions = outputs.encoder_attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [ - self.model_tester.num_attention_heads, - self.model_tester.num_feature_levels, - self.model_tester.encoder_n_points, - ], - ) - - # removed retain_grad and grad on decoder_hidden_states, as queries don't require grad - def test_retain_grad_hidden_states_attentions(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = True - - # no need to test all models as different heads yield the same functionality - model_class = self.all_model_classes[0] - model = model_class(config) - model.to(torch_device) - - inputs = self._prepare_for_class(inputs_dict, model_class) - - outputs = model(**inputs) - - # we take the second output since last_hidden_state is the second item - output = outputs[1] - - encoder_hidden_states = outputs.encoder_hidden_states[0] - encoder_attentions = outputs.encoder_attentions[0] - encoder_hidden_states.retain_grad() - encoder_attentions.retain_grad() - - decoder_attentions = outputs.decoder_attentions[0] - decoder_attentions.retain_grad() - - cross_attentions = outputs.cross_attentions[0] - cross_attentions.retain_grad() - - output.flatten()[0].backward(retain_graph=True) - - self.assertIsNotNone(encoder_hidden_states.grad) - self.assertIsNotNone(encoder_attentions.grad) - self.assertIsNotNone(decoder_attentions.grad) - self.assertIsNotNone(cross_attentions.grad) - - def test_forward_auxiliary_loss(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.auxiliary_loss = True - - # only test for object detection and segmentation model - for model_class in self.all_model_classes[1:]: - model = model_class(config) - model.to(torch_device) - - inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - - outputs = model(**inputs) - - self.assertIsNotNone(outputs.auxiliary_outputs) - self.assertEqual(len(outputs.auxiliary_outputs), self.model_tester.num_hidden_layers - 1) - - def test_forward_signature(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - signature = inspect.signature(model.forward) - # signature.parameters is an OrderedDict => so arg_names order is deterministic - arg_names = [*signature.parameters.keys()] - - if model.config.is_encoder_decoder: - expected_arg_names = ["pixel_values", "pixel_mask"] - expected_arg_names.extend( - ["head_mask", "decoder_head_mask", "encoder_outputs"] - if "head_mask" and "decoder_head_mask" in arg_names - else [] - ) - self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names) - else: - expected_arg_names = ["pixel_values", "pixel_mask"] - self.assertListEqual(arg_names[:1], expected_arg_names) - - @unittest.skip(reason="Model doesn't use tied weights") - def test_tied_model_weights_key_ignore(self): - pass - - def test_initialization(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - configs_no_init = _config_zero_init(config) - for model_class in self.all_model_classes: - model = model_class(config=configs_no_init) - # Skip the check for the backbone - for name, module in model.named_modules(): - if module.__class__.__name__ == "DetaBackboneWithPositionalEncodings": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] - break - - for name, param in model.named_parameters(): - if param.requires_grad: - if ( - "level_embed" in name - or "sampling_offsets.bias" in name - or "value_proj" in name - or "output_proj" in name - or "reference_points" in name - or name in backbone_params - ): - continue - self.assertIn( - ((param.data.mean() * 1e9).round() / 1e9).item(), - [0.0, 1.0], - msg=f"Parameter {name} of model {model_class} seems not properly initialized", - ) - - @unittest.skip("No support for low_cpu_mem_usage=True.") - def test_save_load_low_cpu_mem_usage(self): - pass - - @unittest.skip("No support for low_cpu_mem_usage=True.") - def test_save_load_low_cpu_mem_usage_checkpoints(self): - pass - - @unittest.skip("No support for low_cpu_mem_usage=True.") - def test_save_load_low_cpu_mem_usage_no_safetensors(self): - pass - - # Inspired by tests.test_modeling_common.ModelTesterMixin.test_tied_weights_keys - def test_tied_weights_keys(self): - for model_class in self.all_model_classes: - # We need to pass model class name to correctly initialize the config. - # If we don't pass it, the config for `DetaForObjectDetection`` will be initialized - # with `two_stage=False` and the test will fail because for that case `class_embed` - # weights are not tied. - config, _ = self.model_tester.prepare_config_and_inputs_for_common(model_class_name=model_class.__name__) - config.tie_word_embeddings = True - - model_tied = model_class(config) - - ptrs = collections.defaultdict(list) - for name, tensor in model_tied.state_dict().items(): - ptrs[id_tensor_storage(tensor)].append(name) - - # These are all the pointers of shared tensors. - tied_params = [names for _, names in ptrs.items() if len(names) > 1] - - tied_weight_keys = model_tied._tied_weights_keys if model_tied._tied_weights_keys is not None else [] - # Detect we get a hit for each key - for key in tied_weight_keys: - is_tied_key = any(re.search(key, p) for group in tied_params for p in group) - self.assertTrue(is_tied_key, f"{key} is not a tied weight key for {model_class}.") - - # Removed tied weights found from tied params -> there should only be one left after - for key in tied_weight_keys: - for i in range(len(tied_params)): - tied_params[i] = [p for p in tied_params[i] if re.search(key, p) is None] - - tied_params = [group for group in tied_params if len(group) > 1] - self.assertListEqual( - tied_params, - [], - f"Missing `_tied_weights_keys` for {model_class}: add all of {tied_params} except one.", - ) - - -TOLERANCE = 1e-4 - - -# We will verify our results on an image of cute cats -def prepare_img(): - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - return image - - -@require_torchvision -@require_vision -@slow -class DetaModelIntegrationTests(unittest.TestCase): - @cached_property - def default_image_processor(self): - return AutoImageProcessor.from_pretrained("jozhang97/deta-resnet-50") if is_vision_available() else None - - def test_inference_object_detection_head(self): - model = DetaForObjectDetection.from_pretrained("jozhang97/deta-resnet-50").to(torch_device) - - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - - with torch.no_grad(): - outputs = model(**inputs) - - expected_shape_logits = torch.Size((1, 300, model.config.num_labels)) - self.assertEqual(outputs.logits.shape, expected_shape_logits) - - expected_logits = torch.tensor( - [[-7.3978, -2.5406, -4.1668], [-8.2684, -3.9933, -3.8096], [-7.0515, -3.7973, -5.8516]] - ).to(torch_device) - expected_boxes = torch.tensor( - [[0.5043, 0.4973, 0.9998], [0.2542, 0.5489, 0.4748], [0.5490, 0.2765, 0.0570]] - ).to(torch_device) - - self.assertTrue(torch.allclose(outputs.logits[0, :3, :3], expected_logits, atol=1e-4)) - - expected_shape_boxes = torch.Size((1, 300, 4)) - self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes) - self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4)) - - # verify postprocessing - results = image_processor.post_process_object_detection( - outputs, threshold=0.3, target_sizes=[image.size[::-1]] - )[0] - expected_scores = torch.tensor([0.6392, 0.6276, 0.5546, 0.5260, 0.4706], device=torch_device) - expected_labels = [75, 17, 17, 75, 63] - expected_slice_boxes = torch.tensor([40.5866, 73.2107, 176.1421, 117.1751], device=torch_device) - - self.assertTrue(torch.allclose(results["scores"], expected_scores, atol=1e-4)) - self.assertSequenceEqual(results["labels"].tolist(), expected_labels) - self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes)) - - def test_inference_object_detection_head_swin_backbone(self): - model = DetaForObjectDetection.from_pretrained("jozhang97/deta-swin-large").to(torch_device) - - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - - with torch.no_grad(): - outputs = model(**inputs) - - expected_shape_logits = torch.Size((1, 300, model.config.num_labels)) - self.assertEqual(outputs.logits.shape, expected_shape_logits) - - expected_logits = torch.tensor( - [[-7.6308, -2.8485, -5.3737], [-7.2037, -4.5505, -4.8027], [-7.2943, -4.2611, -4.6617]] - ).to(torch_device) - expected_boxes = torch.tensor( - [[0.4987, 0.4969, 0.9999], [0.2549, 0.5498, 0.4805], [0.5498, 0.2757, 0.0569]] - ).to(torch_device) - - self.assertTrue(torch.allclose(outputs.logits[0, :3, :3], expected_logits, atol=1e-4)) - - expected_shape_boxes = torch.Size((1, 300, 4)) - self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes) - self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4)) - - # verify postprocessing - results = image_processor.post_process_object_detection( - outputs, threshold=0.3, target_sizes=[image.size[::-1]] - )[0] - expected_scores = torch.tensor([0.6831, 0.6826, 0.5684, 0.5464, 0.4392], device=torch_device) - expected_labels = [17, 17, 75, 75, 63] - expected_slice_boxes = torch.tensor([345.8478, 23.6754, 639.8562, 372.8265], device=torch_device) - - self.assertTrue(torch.allclose(results["scores"], expected_scores, atol=1e-4)) - self.assertSequenceEqual(results["labels"].tolist(), expected_labels) - self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes)) diff --git a/tests/models/efficientformer/__init__.py b/tests/models/efficientformer/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/efficientformer/test_image_processing_efficientformer.py b/tests/models/efficientformer/test_image_processing_efficientformer.py deleted file mode 100644 index bd91b77106..0000000000 --- a/tests/models/efficientformer/test_image_processing_efficientformer.py +++ /dev/null @@ -1,99 +0,0 @@ -# coding=utf-8 -# Copyright 2021 HuggingFace Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import unittest - -from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_vision_available - -from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs - - -if is_vision_available(): - from transformers import ViTImageProcessor - - -class EfficientFormerImageProcessorTester(unittest.TestCase): - def __init__( - self, - parent, - batch_size=13, - num_channels=3, - image_size=224, - min_resolution=30, - max_resolution=400, - do_resize=True, - size=None, - do_normalize=True, - image_mean=[0.5, 0.5, 0.5], - image_std=[0.5, 0.5, 0.5], - ): - size = size if size is not None else {"height": 18, "width": 18} - self.parent = parent - self.batch_size = batch_size - self.num_channels = num_channels - self.image_size = image_size - self.min_resolution = min_resolution - self.max_resolution = max_resolution - self.do_resize = do_resize - self.size = size - self.do_normalize = do_normalize - self.image_mean = image_mean - self.image_std = image_std - - def prepare_image_processor_dict(self): - return { - "image_mean": self.image_mean, - "image_std": self.image_std, - "do_normalize": self.do_normalize, - "do_resize": self.do_resize, - "size": self.size, - } - - def expected_output_image_shape(self, images): - return self.num_channels, self.size["height"], self.size["width"] - - def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): - return prepare_image_inputs( - batch_size=self.batch_size, - num_channels=self.num_channels, - min_resolution=self.min_resolution, - max_resolution=self.max_resolution, - equal_resolution=equal_resolution, - numpify=numpify, - torchify=torchify, - ) - - -@require_torch -@require_vision -class EfficientFormerImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase): - image_processing_class = ViTImageProcessor if is_vision_available() else None - - def setUp(self): - self.image_processor_tester = EfficientFormerImageProcessorTester(self) - - @property - def image_processor_dict(self): - return self.image_processor_tester.prepare_image_processor_dict() - - def test_image_proc_properties(self): - image_processor = self.image_processing_class(**self.image_processor_dict) - self.assertTrue(hasattr(image_processor, "image_mean")) - self.assertTrue(hasattr(image_processor, "image_std")) - self.assertTrue(hasattr(image_processor, "do_normalize")) - self.assertTrue(hasattr(image_processor, "do_resize")) - self.assertTrue(hasattr(image_processor, "size")) diff --git a/tests/models/efficientformer/test_modeling_efficientformer.py b/tests/models/efficientformer/test_modeling_efficientformer.py deleted file mode 100644 index 6b7ce810ce..0000000000 --- a/tests/models/efficientformer/test_modeling_efficientformer.py +++ /dev/null @@ -1,478 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch EfficientFormer model.""" - -import unittest -import warnings -from typing import List - -from transformers import EfficientFormerConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device -from transformers.utils import cached_property, is_torch_available, is_vision_available - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import ( - EfficientFormerForImageClassification, - EfficientFormerForImageClassificationWithTeacher, - EfficientFormerModel, - ) - from transformers.models.auto.modeling_auto import ( - MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, - MODEL_MAPPING_NAMES, - ) - - -if is_vision_available(): - from PIL import Image - - from transformers import EfficientFormerImageProcessor - - -class EfficientFormerModelTester: - def __init__( - self, - parent, - batch_size: int = 13, - image_size: int = 64, - patch_size: int = 2, - embed_dim: int = 3, - num_channels: int = 3, - is_training: bool = True, - use_labels: bool = True, - hidden_size: int = 128, - hidden_sizes=[16, 32, 64, 128], - num_hidden_layers: int = 7, - num_attention_heads: int = 4, - intermediate_size: int = 37, - hidden_act: str = "gelu", - hidden_dropout_prob: float = 0.1, - attention_probs_dropout_prob: float = 0.1, - type_sequence_label_size: int = 10, - initializer_range: float = 0.02, - encoder_stride: int = 2, - num_attention_outputs: int = 1, - dim: int = 128, - depths: List[int] = [2, 2, 2, 2], - resolution: int = 2, - mlp_expansion_ratio: int = 2, - ): - self.parent = parent - self.batch_size = batch_size - self.image_size = image_size - self.patch_size = patch_size - self.num_channels = num_channels - self.is_training = is_training - self.use_labels = use_labels - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.encoder_stride = encoder_stride - self.num_attention_outputs = num_attention_outputs - self.embed_dim = embed_dim - self.seq_length = embed_dim + 1 - self.resolution = resolution - self.depths = depths - self.hidden_sizes = hidden_sizes - self.dim = dim - self.mlp_expansion_ratio = mlp_expansion_ratio - - def prepare_config_and_inputs(self): - pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) - - labels = None - if self.use_labels: - labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - - config = self.get_config() - return config, pixel_values, labels - - def get_config(self): - return EfficientFormerConfig( - image_size=self.image_size, - patch_size=self.patch_size, - num_channels=self.num_channels, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - is_decoder=False, - initializer_range=self.initializer_range, - encoder_stride=self.encoder_stride, - resolution=self.resolution, - depths=self.depths, - hidden_sizes=self.hidden_sizes, - dim=self.dim, - mlp_expansion_ratio=self.mlp_expansion_ratio, - ) - - def create_and_check_model(self, config, pixel_values, labels): - model = EfficientFormerModel(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_for_image_classification(self, config, pixel_values, labels): - config.num_labels = self.type_sequence_label_size - model = EfficientFormerForImageClassification(config) - model.to(torch_device) - model.eval() - result = model(pixel_values, labels=labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) - - # test greyscale images - config.num_channels = 1 - model = EfficientFormerForImageClassification(config) - model.to(torch_device) - model.eval() - - pixel_values = floats_tensor([self.batch_size, 1, self.image_size, self.image_size]) - result = model(pixel_values) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - pixel_values, - labels, - ) = config_and_inputs - inputs_dict = {"pixel_values": pixel_values} - return config, inputs_dict - - -@require_torch -class EfficientFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - """ - Here we also overwrite some of the tests of test_modeling_common.py, as EfficientFormer does not use input_ids, inputs_embeds, - attention_mask and seq_length. - """ - - all_model_classes = ( - ( - EfficientFormerModel, - EfficientFormerForImageClassificationWithTeacher, - EfficientFormerForImageClassification, - ) - if is_torch_available() - else () - ) - pipeline_model_mapping = ( - { - "image-feature-extraction": EfficientFormerModel, - "image-classification": ( - EfficientFormerForImageClassification, - EfficientFormerForImageClassificationWithTeacher, - ), - } - if is_torch_available() - else {} - ) - fx_compatible = False - - test_pruning = False - test_resize_embeddings = False - test_head_masking = False - - def setUp(self): - self.model_tester = EfficientFormerModelTester(self) - self.config_tester = ConfigTester( - self, config_class=EfficientFormerConfig, has_text_modality=False, hidden_size=37 - ) - - def test_config(self): - self.config_tester.run_common_tests() - - @unittest.skip(reason="EfficientFormer does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="EfficientFormer does not support input and output embeddings") - def test_model_common_attributes(self): - pass - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - if hasattr(self.model_tester, "encoder_seq_length"): - seq_length = self.model_tester.encoder_seq_length - if hasattr(self.model_tester, "chunk_length") and self.model_tester.chunk_length > 1: - seq_length = seq_length * self.model_tester.chunk_length - else: - seq_length = self.model_tester.seq_length - - self.assertListEqual( - list(hidden_states[-1].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - if config.is_encoder_decoder: - hidden_states = outputs.decoder_hidden_states - - self.assertIsInstance(hidden_states, (list, tuple)) - self.assertEqual(len(hidden_states), expected_num_layers) - seq_len = getattr(self.model_tester, "seq_length", None) - decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len) - - self.assertListEqual( - list(hidden_states[-1].shape[-2:]), - [decoder_seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - - def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): - inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) - - if return_labels: - if model_class.__name__ == "EfficientFormerForImageClassificationWithTeacher": - del inputs_dict["labels"] - - return inputs_dict - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - @unittest.skip(reason="EfficientFormer does not implement masked image modeling yet") - def test_for_masked_image_modeling(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_masked_image_modeling(*config_and_inputs) - - def test_for_image_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_classification(*config_and_inputs) - - # special case for EfficientFormerForImageClassificationWithTeacher model - def test_training(self): - if not self.model_tester.is_training: - return - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - for model_class in self.all_model_classes: - # EfficientFormerForImageClassificationWithTeacher supports inference-only - if ( - model_class.__name__ in MODEL_MAPPING_NAMES.values() - or model_class.__name__ == "EfficientFormerForImageClassificationWithTeacher" - ): - continue - model = model_class(config) - model.to(torch_device) - model.train() - inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - loss = model(**inputs).loss - loss.backward() - - def test_problem_types(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - problem_types = [ - {"title": "multi_label_classification", "num_labels": 2, "dtype": torch.float}, - {"title": "single_label_classification", "num_labels": 1, "dtype": torch.long}, - {"title": "regression", "num_labels": 1, "dtype": torch.float}, - ] - - for model_class in self.all_model_classes: - if ( - model_class.__name__ - not in [ - *MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES.values(), - ] - or model_class.__name__ == "EfficientFormerForImageClassificationWithTeacher" - ): - continue - - for problem_type in problem_types: - with self.subTest(msg=f"Testing {model_class} with {problem_type['title']}"): - config.problem_type = problem_type["title"] - config.num_labels = problem_type["num_labels"] - - model = model_class(config) - model.to(torch_device) - model.train() - - inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - - if problem_type["num_labels"] > 1: - inputs["labels"] = inputs["labels"].unsqueeze(1).repeat(1, problem_type["num_labels"]) - - inputs["labels"] = inputs["labels"].to(problem_type["dtype"]) - - # This tests that we do not trigger the warning form PyTorch "Using a target size that is different - # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure - # they have the same size." which is a symptom something in wrong for the regression problem. - # See https://github.com/huggingface/transformers/issues/11780 - with warnings.catch_warnings(record=True) as warning_list: - loss = model(**inputs).loss - for w in warning_list: - if "Using a target size that is different to the input size" in str(w.message): - raise ValueError( - f"Something is going wrong in the regression problem: intercepted {w.message}" - ) - - loss.backward() - - @slow - def test_model_from_pretrained(self): - model_name = "snap-research/efficientformer-l1-300" - model = EfficientFormerModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - seq_len = getattr(self.model_tester, "seq_length", None) - encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len) - encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) - chunk_length = getattr(self.model_tester, "chunk_length", None) - - if chunk_length is not None and hasattr(self.model_tester, "num_hashes"): - encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_attention_outputs) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_attention_outputs) - - if chunk_length is not None: - self.assertListEqual( - list(attentions[0].shape[-4:]), - [self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length], - ) - else: - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], - ) - - -# We will verify our results on an image of cute cats -def prepare_img(): - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - return image - - -@require_torch -@require_vision -class EfficientFormerModelIntegrationTest(unittest.TestCase): - @cached_property - def default_image_processor(self): - return ( - EfficientFormerImageProcessor.from_pretrained("snap-research/efficientformer-l1-300") - if is_vision_available() - else None - ) - - @slow - def test_inference_image_classification_head(self): - model = EfficientFormerForImageClassification.from_pretrained("snap-research/efficientformer-l1-300").to( - torch_device - ) - - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - - # forward pass - with torch.no_grad(): - outputs = model(**inputs) - - # verify the logits - expected_shape = (1, 1000) - self.assertEqual(outputs.logits.shape, expected_shape) - - expected_slice = torch.tensor([-0.0555, 0.4825, -0.0852]).to(torch_device) - self.assertTrue(torch.allclose(outputs.logits[0][:3], expected_slice, atol=1e-4)) - - @slow - def test_inference_image_classification_head_with_teacher(self): - model = EfficientFormerForImageClassificationWithTeacher.from_pretrained( - "snap-research/efficientformer-l1-300" - ).to(torch_device) - - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - - # forward pass - with torch.no_grad(): - outputs = model(**inputs) - - # verify the logits - expected_shape = (1, 1000) - self.assertEqual(outputs.logits.shape, expected_shape) - - expected_slice = torch.tensor([-0.1312, 0.4353, -1.0499]).to(torch_device) - self.assertTrue(torch.allclose(outputs.logits[0][:3], expected_slice, atol=1e-4)) diff --git a/tests/models/efficientformer/test_modeling_tf_efficientformer.py b/tests/models/efficientformer/test_modeling_tf_efficientformer.py deleted file mode 100644 index abb0878740..0000000000 --- a/tests/models/efficientformer/test_modeling_tf_efficientformer.py +++ /dev/null @@ -1,409 +0,0 @@ -# coding=utf-8 -# Copyright 2023 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the TensorFlow EfficientFormer model.""" - -import inspect -import unittest -from typing import List - -import numpy as np - -from transformers import EfficientFormerConfig -from transformers.testing_utils import require_tf, require_vision, slow -from transformers.utils import cached_property, is_tf_available, is_vision_available - -from ...test_configuration_common import ConfigTester -from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_tf_available(): - import tensorflow as tf - - from transformers import ( - TFEfficientFormerForImageClassification, - TFEfficientFormerForImageClassificationWithTeacher, - TFEfficientFormerModel, - ) - from transformers.modeling_tf_utils import keras - - -if is_vision_available(): - from PIL import Image - - from transformers import EfficientFormerImageProcessor - - -class TFEfficientFormerModelTester: - def __init__( - self, - parent, - batch_size: int = 13, - image_size: int = 64, - patch_size: int = 2, - embed_dim: int = 3, - num_channels: int = 3, - is_training: bool = True, - use_labels: bool = True, - hidden_size: int = 128, - hidden_sizes=[16, 32, 64, 128], - num_hidden_layers: int = 7, - num_attention_heads: int = 4, - intermediate_size: int = 37, - hidden_act: str = "gelu", - hidden_dropout_prob: float = 0.1, - attention_probs_dropout_prob: float = 0.1, - type_sequence_label_size: int = 10, - initializer_range: float = 0.02, - encoder_stride: int = 2, - num_attention_outputs: int = 1, - dim: int = 128, - depths: List[int] = [2, 2, 2, 2], - resolution: int = 2, - mlp_expansion_ratio: int = 2, - ): - self.parent = parent - self.batch_size = batch_size - self.image_size = image_size - self.patch_size = patch_size - self.num_channels = num_channels - self.is_training = is_training - self.use_labels = use_labels - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.encoder_stride = encoder_stride - self.num_attention_outputs = num_attention_outputs - self.embed_dim = embed_dim - self.seq_length = embed_dim + 1 - self.resolution = resolution - self.depths = depths - self.hidden_sizes = hidden_sizes - self.dim = dim - self.mlp_expansion_ratio = mlp_expansion_ratio - - def prepare_config_and_inputs(self): - pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) - - labels = None - if self.use_labels: - labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - - config = self.get_config() - - return config, pixel_values, labels - - def get_config(self): - return EfficientFormerConfig( - image_size=self.image_size, - patch_size=self.patch_size, - num_channels=self.num_channels, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - is_decoder=False, - initializer_range=self.initializer_range, - encoder_stride=self.encoder_stride, - resolution=self.resolution, - depths=self.depths, - hidden_sizes=self.hidden_sizes, - dim=self.dim, - mlp_expansion_ratio=self.mlp_expansion_ratio, - ) - - def create_and_check_model(self, config, pixel_values, labels): - model = TFEfficientFormerModel(config=config) - result = model(pixel_values, training=False) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_for_image_classification(self, config, pixel_values, labels): - config.num_labels = self.type_sequence_label_size - model = TFEfficientFormerForImageClassification(config) - result = model(pixel_values, labels=labels, training=False) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) - - # test greyscale images - config.num_channels = 1 - model = TFEfficientFormerForImageClassification(config) - - pixel_values = floats_tensor([self.batch_size, 1, self.image_size, self.image_size]) - result = model(pixel_values, labels=labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - config, pixel_values, labels = config_and_inputs - inputs_dict = {"pixel_values": pixel_values} - return config, inputs_dict - - -@require_tf -class TFEfficientFormerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - """ - Here we also overwrite some of the tests of test_modeling_tf_common.py, as EfficientFormer does not use input_ids, - inputs_embeds, attention_mask and seq_length. - """ - - all_model_classes = ( - ( - TFEfficientFormerModel, - TFEfficientFormerForImageClassificationWithTeacher, - TFEfficientFormerForImageClassification, - ) - if is_tf_available() - else () - ) - pipeline_model_mapping = ( - { - "feature-extraction": TFEfficientFormerModel, - "image-classification": ( - TFEfficientFormerForImageClassification, - TFEfficientFormerForImageClassificationWithTeacher, - ), - } - if is_tf_available() - else {} - ) - - fx_compatible = False - - test_pruning = False - test_resize_embeddings = False - test_head_masking = False - test_onnx = False - - def setUp(self): - self.model_tester = TFEfficientFormerModelTester(self) - self.config_tester = ConfigTester( - self, config_class=EfficientFormerConfig, has_text_modality=False, hidden_size=37 - ) - - def test_config(self): - self.config_tester.run_common_tests() - - @unittest.skip(reason="EfficientFormer does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="EfficientFormer does not support input and output embeddings") - def test_model_common_attributes(self): - pass - - def test_forward_signature(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - signature = inspect.signature(model.call) - # signature.parameters is an OrderedDict => so arg_names order is deterministic - arg_names = [*signature.parameters.keys()] - - expected_arg_names = ["pixel_values"] - self.assertListEqual(arg_names[:1], expected_arg_names) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - - outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False) - hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - if hasattr(self.model_tester, "encoder_seq_length"): - seq_length = self.model_tester.encoder_seq_length - if hasattr(self.model_tester, "chunk_length") and self.model_tester.chunk_length > 1: - seq_length = seq_length * self.model_tester.chunk_length - else: - seq_length = self.model_tester.seq_length - - self.assertListEqual( - list(hidden_states[-1].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - if config.is_encoder_decoder: - hidden_states = outputs.decoder_hidden_states - - self.asseretIsInstance(hidden_states, (list, tuple)) - self.assertEqual(len(hidden_states), expected_num_layers) - seq_len = getattr(self.model_tester, "seq_length", None) - decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len) - - self.assertListEqual( - list(hidden_states[-1].shape[-2:]), - [decoder_seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - - def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): - inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) - - if return_labels: - if model_class.__name__ == "TFEfficientFormerForImageClassificationWithTeacher": - del inputs_dict["labels"] - - return inputs_dict - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - @unittest.skip(reason="EfficientFormer does not implement masked image modeling yet") - def test_for_masked_image_modeling(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_masked_image_modeling(*config_and_inputs) - - def test_for_image_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_classification(*config_and_inputs) - - @slow - def test_model_from_pretrained(self): - model_name = "snap-research/efficientformer-l1-300" - model = TFEfficientFormerModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - seq_len = getattr(self.model_tester, "seq_length", None) - encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len) - encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) - chunk_length = getattr(self.model_tester, "chunk_length", None) - - if chunk_length is not None and hasattr(self.model_tester, "num_hashes"): - encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - - outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False) - attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_attention_outputs) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False) - - attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_attention_outputs) - - if chunk_length is not None: - self.assertListEqual( - list(attentions[0].shape[-4:]), - [self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length], - ) - else: - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], - ) - - def test_compile_tf_model(self): - # We use a simplified version of this test for EfficientFormer because it requires training=False - # and Keras refuses to let us force that during functional construction - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - # Prepare our model - model = model_class(config) - # These are maximally general inputs for the model, with multiple None dimensions - # Hopefully this will catch any conditionals that fail for flexible shapes - functional_inputs = { - key: keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key) - for key, val in model.input_signature.items() - if key in model.dummy_inputs - } - outputs_dict = model(functional_inputs) - self.assertTrue(outputs_dict is not None) - - -# We will verify our results on an image of cute cats -def prepare_img(): - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - return image - - -@require_tf -@require_vision -class EfficientFormerModelIntegrationTest(unittest.TestCase): - @cached_property - def default_image_processor(self): - return ( - EfficientFormerImageProcessor.from_pretrained("snap-research/efficientformer-l1-300") - if is_vision_available() - else None - ) - - @slow - def test_inference_image_classification_head(self): - model = TFEfficientFormerForImageClassification.from_pretrained("snap-research/efficientformer-l1-300") - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="tf") - # forward pass - outputs = model(**inputs, training=False) - # verify the logits - expected_shape = tf.TensorShape((1, 1000)) - self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = tf.constant([-0.0555, 0.4825, -0.0852]) - self.assertTrue(np.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4)) - - @slow - def test_inference_image_classification_head_with_teacher(self): - model = TFEfficientFormerForImageClassificationWithTeacher.from_pretrained( - "snap-research/efficientformer-l1-300" - ) - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="tf") - # forward pass - outputs = model(**inputs, training=False) - # verify the logits - expected_shape = tf.TensorShape((1, 1000)) - self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = tf.constant([-0.1312, 0.4353, -1.0499]) - self.assertTrue(np.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4)) diff --git a/tests/models/ernie_m/__init__.py b/tests/models/ernie_m/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/ernie_m/test_modeling_ernie_m.py b/tests/models/ernie_m/test_modeling_ernie_m.py deleted file mode 100644 index 17c9aa89f3..0000000000 --- a/tests/models/ernie_m/test_modeling_ernie_m.py +++ /dev/null @@ -1,323 +0,0 @@ -# coding=utf-8 -# Copyright 2023 The HuggingFace Inc. and Baidu team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch ErnieM model.""" - -import unittest - -from transformers import ErnieMConfig, is_torch_available -from transformers.testing_utils import require_torch, slow, torch_device - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import ( - ErnieMForInformationExtraction, - ErnieMForMultipleChoice, - ErnieMForQuestionAnswering, - ErnieMForSequenceClassification, - ErnieMForTokenClassification, - ErnieMModel, - ) - - -class ErnieMModelTester: - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = self.get_config() - - return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - - def prepare_config_and_inputs_for_uiem(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - config = self.get_config() - - return config, input_ids, input_mask - - def get_config(self): - return ErnieMConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - def create_and_check_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): - model = ErnieMModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, return_dict=True) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_for_question_answering( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = ErnieMForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - def create_and_check_for_information_extraction( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = ErnieMForInformationExtraction(config=config) - model.to(torch_device) - model.eval() - sequence_labels = torch.ones_like(input_ids, dtype=torch.float32) - result = model( - input_ids, - attention_mask=input_mask, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - def create_and_check_for_sequence_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = ErnieMForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, labels=sequence_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - def create_and_check_for_token_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = ErnieMForTokenClassification(config=config) - model.to(torch_device) - model.eval() - input_ids.to(torch_device) - input_mask.to(torch_device) - token_labels.to(torch_device) - - result = model(input_ids, attention_mask=input_mask, labels=token_labels) - - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) - - def create_and_check_for_multiple_choice( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = ErnieMForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - result = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - labels=choice_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} - return config, inputs_dict - - -@require_torch -class ErnieMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - ( - ErnieMModel, - ErnieMForMultipleChoice, - ErnieMForQuestionAnswering, - ErnieMForSequenceClassification, - ErnieMForTokenClassification, - ) - if is_torch_available() - else () - ) - all_generative_model_classes = () - pipeline_model_mapping = ( - { - "feature-extraction": ErnieMModel, - "question-answering": ErnieMForQuestionAnswering, - "text-classification": ErnieMForSequenceClassification, - "token-classification": ErnieMForTokenClassification, - "zero-shot": ErnieMForSequenceClassification, - } - if is_torch_available() - else {} - ) - test_torchscript = False - - # TODO: Fix the failed tests when this model gets more usage - def is_pipeline_test_to_skip( - self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name - ): - if pipeline_test_casse_name == "QAPipelineTests": - return True - - return False - - def setUp(self): - self.model_tester = ErnieMModelTester(self) - self.config_tester = ConfigTester(self, config_class=ErnieMConfig, hidden_size=37) - - def test_config(self): - self.config_tester.run_common_tests() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_model_various_embeddings(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - for type in ["absolute", "relative_key", "relative_key_query"]: - config_and_inputs[0].position_embedding_type = type - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_for_multiple_choice(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - - def test_for_question_answering(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_question_answering(*config_and_inputs) - - def test_for_sequence_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs) - - def test_for_information_extraction(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_information_extraction(*config_and_inputs) - - def test_for_token_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_token_classification(*config_and_inputs) - - @slow - def test_model_from_pretrained(self): - model_name = "susnato/ernie-m-base_pytorch" - model = ErnieMModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - -@require_torch -class ErnieMModelIntegrationTest(unittest.TestCase): - @slow - def test_inference_model(self): - model = ErnieMModel.from_pretrained("susnato/ernie-m-base_pytorch") - model.eval() - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) - output = model(input_ids)[0] - - # TODO Replace vocab size - hidden_size = 768 - - expected_shape = torch.Size((1, 6, hidden_size)) - self.assertEqual(output.shape, expected_shape) - - expected_slice = torch.tensor( - [[[-0.0012, 0.1245, -0.0214], [-0.0742, 0.0244, -0.0771], [-0.0333, 0.1164, -0.1554]]] - ) - - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3)) diff --git a/tests/models/ernie_m/test_tokenization_ernie_m.py b/tests/models/ernie_m/test_tokenization_ernie_m.py deleted file mode 100644 index 01de7d3731..0000000000 --- a/tests/models/ernie_m/test_tokenization_ernie_m.py +++ /dev/null @@ -1,143 +0,0 @@ -# coding=utf-8 -# Copyright 2023 The HuggingFace Inc. and Baidu team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch ErnieM model.""" - -import unittest - -from transformers import ErnieMTokenizer -from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow - -from ...test_tokenization_common import TokenizerTesterMixin - - -SAMPLE_VOCAB = get_tests_dir("fixtures/spiece.model") - - -@require_sentencepiece -@require_tokenizers -class ErnieMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): - from_pretrained_id = "susnato/ernie-m-base_pytorch" - tokenizer_class = ErnieMTokenizer - test_seq2seq = False - test_sentencepiece = True - test_rust_tokenizer = False - test_sentencepiece_ignore_case = False - - def setUp(self): - super().setUp() - - # We have a SentencePiece fixture for testing - tokenizer = ErnieMTokenizer(SAMPLE_VOCAB, unk_token="", pad_token="") - tokenizer.save_pretrained(self.tmpdirname) - - def get_input_output_texts(self, tokenizer): - input_text = "this is a test" - output_text = "this is a test" - return input_text, output_text - - def test_convert_token_and_id(self): - """Test ``_convert_token_to_id`` and ``_convert_id_to_token``.""" - token = "" - token_id = 0 - - self.assertEqual(self.get_tokenizer()._convert_token_to_id(token), token_id) - self.assertEqual(self.get_tokenizer()._convert_id_to_token(token_id), token) - - def test_get_vocab(self): - vocab_keys = list(self.get_tokenizer().get_vocab().keys()) - - self.assertEqual(vocab_keys[0], "") - self.assertEqual(vocab_keys[1], "") - self.assertEqual(vocab_keys[-1], "▁eloquent") - self.assertEqual(len(vocab_keys), 30_000) - - def test_vocab_size(self): - self.assertEqual(self.get_tokenizer().vocab_size, 30_000) - - def test_rust_and_python_full_tokenizers(self): - if not self.test_rust_tokenizer: - return - - tokenizer = self.get_tokenizer() - rust_tokenizer = self.get_rust_tokenizer() - - sequence = "I was born in 92000, and this is falsé." - - tokens = tokenizer.tokenize(sequence) - rust_tokens = rust_tokenizer.tokenize(sequence) - self.assertListEqual(tokens, rust_tokens) - - ids = tokenizer.encode(sequence, add_special_tokens=False) - rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False) - self.assertListEqual(ids, rust_ids) - - rust_tokenizer = self.get_rust_tokenizer() - ids = tokenizer.encode(sequence) - rust_ids = rust_tokenizer.encode(sequence) - self.assertListEqual(ids, rust_ids) - - def test_full_tokenizer(self): - tokenizer = ErnieMTokenizer(SAMPLE_VOCAB, do_lower_case=True, unk_token="", pad_token="") - - tokens = tokenizer.tokenize("This is a test") - self.assertListEqual(tokens, ["▁this", "▁is", "▁a", "▁test"]) - - self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [48, 25, 21, 1289]) - - tokens = tokenizer.tokenize("I was born in 92000, and this is falsé.") - # ErnieMTokenizer(paddlenlp implementation) outputs '9' instead of '_9' so to mimic that '_9' is changed to '9' - self.assertListEqual( - tokens, ["▁i", "▁was", "▁born", "▁in", "9", "2000", ",", "▁and", "▁this", "▁is", "▁fal", "s", "é", "."] - ) - ids = tokenizer.convert_tokens_to_ids(tokens) - self.assertListEqual(ids, [31, 23, 386, 19, 518, 3050, 15, 17, 48, 25, 8256, 18, 1, 9]) - - back_tokens = tokenizer.convert_ids_to_tokens(ids) - self.assertListEqual( - back_tokens, - ["▁i", "▁was", "▁born", "▁in", "9", "2000", ",", "▁and", "▁this", "▁is", "▁fal", "s", "", "."], - ) - - def test_sequence_builders(self): - tokenizer = ErnieMTokenizer(SAMPLE_VOCAB, unk_token="", pad_token="") - - text = tokenizer.encode("sequence builders") - text_2 = tokenizer.encode("multi-sequence build") - - encoded_sentence = tokenizer.build_inputs_with_special_tokens(text) - encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2) - - assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] - assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + [ - tokenizer.sep_token_id - ] + text_2 + [tokenizer.sep_token_id] - - @slow - def test_tokenizer_integration(self): - expected_encoding = {'input_ids': [[0, 11062, 82772, 7, 15, 82772, 538, 51529, 237, 17198, 1290, 206, 9, 215175, 1314, 136, 17198, 1290, 206, 9, 56359, 42, 122009, 9, 16466, 16, 87344, 4537, 9, 4717, 78381, 6, 159958, 7, 15, 24480, 618, 4, 527, 22693, 9, 304, 4, 2777, 24480, 9874, 4, 43523, 594, 4, 803, 18392, 33189, 18, 4, 43523, 24447, 5, 5, 5, 16, 100, 24955, 83658, 9626, 144057, 15, 839, 22335, 16, 136, 24955, 83658, 83479, 15, 39102, 724, 16, 678, 645, 6460, 1328, 4589, 42, 122009, 115774, 23, 3559, 1328, 46876, 7, 136, 53894, 1940, 42227, 41159, 17721, 823, 425, 4, 27512, 98722, 206, 136, 5531, 4970, 919, 17336, 5, 2], [0, 20080, 618, 83, 82775, 47, 479, 9, 1517, 73, 53894, 333, 80581, 110117, 18811, 5256, 1295, 51, 152526, 297, 7986, 390, 124416, 538, 35431, 214, 98, 15044, 25737, 136, 7108, 43701, 23, 756, 135355, 7, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 581, 63773, 119455, 6, 147797, 88203, 7, 645, 70, 21, 3285, 10269, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]} # fmt: skip - - self.tokenizer_integration_test_util( - expected_encoding=expected_encoding, - model_name="susnato/ernie-m-base_pytorch", - sequences=[ - "Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides " - "general-purpose architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet...) for Natural " - "Language Understanding (NLU) and Natural Language Generation (NLG) with over32+ pretrained " - "models in100+ languages and deep interoperability between Jax, PyTorch and TensorFlow.", - "BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly " - "conditioning on both left and right context in all layers.", - "The quick brown fox jumps over the lazy dog.", - ], - ) diff --git a/tests/models/gptsan_japanese/__init__.py b/tests/models/gptsan_japanese/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py b/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py deleted file mode 100644 index 8c48bb5017..0000000000 --- a/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py +++ /dev/null @@ -1,476 +0,0 @@ -# coding=utf-8 -# Copyright 2023 Toshiyuki Sakamoto(tanreinama) and HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import unittest - -import numpy as np - -from transformers import ( - GPTSanJapaneseConfig, - GPTSanJapaneseForConditionalGeneration, - GPTSanJapaneseModel, - GPTSanJapaneseTokenizer, - is_torch_available, -) -from transformers.generation import GenerationConfig -from transformers.testing_utils import require_torch, slow, tooslow, torch_device - -from ...generation.test_utils import GenerationTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -class GPTSanJapaneseTester: - def __init__( - self, - parent, - vocab_size=99, - batch_size=13, - num_contexts=7, - # For common tests - is_training=True, - hidden_size=32, - ext_size=42, - num_hidden_layers=2, - num_ext_layers=2, - num_attention_heads=4, - num_experts=2, - d_ff=32, - d_ext=80, - d_spout=33, - dropout_rate=0.0, - layer_norm_epsilon=1e-6, - expert_capacity=100, - router_jitter_noise=0.0, - ): - self.vocab_size = vocab_size - self.parent = parent - self.batch_size = batch_size - self.num_contexts = num_contexts - # For common tests - self.seq_length = self.num_contexts - self.is_training = is_training - self.hidden_size = hidden_size - self.num_ext_layers = num_ext_layers - self.ext_size = ext_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.num_experts = num_experts - self.d_ff = d_ff - self.d_ext = d_ext - self.d_spout = d_spout - self.dropout_rate = dropout_rate - self.layer_norm_epsilon = layer_norm_epsilon - self.expert_capacity = expert_capacity - self.router_jitter_noise = router_jitter_noise - - def get_large_model_config(self): - return GPTSanJapaneseConfig.from_pretrained("Tanrei/GPTSAN-japanese") - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - config = self.get_config() - - return (config, input_ids) - - def prepare_config_and_inputs_for_common(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - config = self.get_config() - - return (config, {"input_ids": input_ids}) - - def get_config(self): - return GPTSanJapaneseConfig( - vocab_size=self.vocab_size, - num_contexts=self.seq_length, - d_model=self.hidden_size, - d_ff=self.d_ff, - d_ext=self.d_ext, - d_spout=self.d_spout, - num_switch_layers=self.num_hidden_layers - self.num_ext_layers, - num_ext_layers=self.num_ext_layers, - num_heads=self.num_attention_heads, - num_experts=self.num_experts, - expert_capacity=self.expert_capacity, - dropout_rate=self.dropout_rate, - layer_norm_epsilon=self.layer_norm_epsilon, - router_jitter_noise=self.router_jitter_noise, - ) - - def create_and_check_model( - self, - config, - input_ids, - ): - model = GPTSanJapaneseForConditionalGeneration(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids=input_ids, - ) - self.parent.assertIsNotNone(result) - - -@require_torch -class GPTSanJapaneseTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = (GPTSanJapaneseModel,) if is_torch_available() else () - pipeline_model_mapping = ( - { - "conversational": GPTSanJapaneseForConditionalGeneration, - "feature-extraction": GPTSanJapaneseForConditionalGeneration, - "summarization": GPTSanJapaneseForConditionalGeneration, - "text2text-generation": GPTSanJapaneseForConditionalGeneration, - "translation": GPTSanJapaneseForConditionalGeneration, - } - if is_torch_available() - else {} - ) - fx_compatible = False - is_encoder_decoder = False - test_pruning = False - test_headmasking = False - test_save_load_fast_init_to_base = False - test_training = False - # The small GPTSAN_JAPANESE model needs higher percentages for CPU/MP tests - model_split_percents = [0.5, 0.8, 0.9] - - # TODO: Fix the failed tests when this model gets more usage - def is_pipeline_test_to_skip( - self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name - ): - if pipeline_test_casse_name == "SummarizationPipelineTests": - # TODO: fix `_reorder_cache` is not implemented for this model - return True - elif pipeline_test_casse_name == "Text2TextGenerationPipelineTests": - # TODO: check this. - return True - - return False - - def setUp(self): - self.model_tester = GPTSanJapaneseTester(self) - self.config_tester = ConfigTester(self, config_class=GPTSanJapaneseConfig, d_model=37) - - def test_config(self): - GPTSanJapaneseConfig() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - @unittest.skip( - reason="skip for now as the computed `max_memory` by `model_split_percents` in the test method will be changed inside `from_pretrained`" - ) - def test_model_parallelism(self): - super().test_model_parallelism() - - @unittest.skip(reason="Gptsan does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="Gptsan does not use inputs_embeds") - def test_inputs_embeds_matches_input_ids(self): - pass - - -@require_torch -class GPTSanJapaneseForConditionalGenerationTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): - all_model_classes = (GPTSanJapaneseForConditionalGeneration,) if is_torch_available() else () - fx_compatible = False - is_encoder_decoder = False - test_pruning = False - test_headmasking = False - # The small GPTSAN_JAPANESE model needs higher percentages for CPU/MP tests - model_split_percents = [0.5, 0.8, 0.9] - - def setUp(self): - self.model_tester = GPTSanJapaneseTester(self) - self.config_tester = ConfigTester(self, config_class=GPTSanJapaneseConfig, d_model=37) - - def test_config(self): - GPTSanJapaneseConfig() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - @unittest.skip( - reason="skip for now as the computed `max_memory` by `model_split_percents` in the test method will be changed inside `from_pretrained`" - ) - def test_model_parallelism(self): - super().test_model_parallelism() - - @unittest.skip(reason="Gptsan does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="Gptsan does not use inputs_embeds") - def test_inputs_embeds_matches_input_ids(self): - pass - - @slow - def test_logits(self): - model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese") - tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese") - input_ids = tokenizer.encode("武田信玄は", return_tensors="pt") - outputs = model(input_ids) - output_logits = outputs.logits.detach().cpu().numpy() - # Output of original model created with mesh-tensoflow - # fmt: off - target = [ - [-12.037839889526367, -12.433061599731445, -14.333840370178223, -12.450345993041992, -11.1661376953125, - -11.930137634277344, -10.659740447998047, -12.909574508666992, -13.241043090820312, -13.398579597473145, - -11.107524871826172, -12.3685941696167, -22.97943115234375, -10.481067657470703, -12.484030723571777, - -12.807360649108887, -14.769700050354004, -12.233579635620117, -13.428145408630371, -22.624177932739258], - [-7.511149883270264, -8.281851768493652, -7.943127155303955, -7.55021333694458, -6.49869966506958, - -7.586796283721924, -6.978085994720459, -7.839145183563232, -8.21964168548584, -8.695091247558594, - -6.706910610198975, -6.6585798263549805, -19.565698623657227, -5.353842735290527, -8.350686073303223, - -8.039388656616211, -10.856569290161133, -7.75154447555542, -8.819022178649902, -19.51532745361328], - [-9.73066234588623, -10.223922729492188, -9.932981491088867, -11.857836723327637, -7.662626266479492, - -11.13529109954834, -7.765097618103027, -11.472923278808594, -9.543149948120117, -11.905633926391602, - -9.366164207458496, -11.5734281539917, -23.699003219604492, -9.429590225219727, -10.42839241027832, - -10.585240364074707, -10.94771957397461, -11.095416069030762, -10.390240669250488, -23.769372940063477], - [-9.728265762329102, -9.859712600708008, -10.09729290008545, -9.678522109985352, -6.879519939422607, - -9.68487548828125, -4.2803425788879395, -10.018914222717285, -9.308445930480957, -10.63394546508789, - -8.083646774291992, -9.06301498413086, -21.904266357421875, -8.90160846710205, -8.841876029968262, - -11.856719970703125, -12.079398155212402, -11.233753204345703, -10.177338600158691, -21.87256622314453], - [-9.669764518737793, -9.614198684692383, -9.814510345458984, -9.996501922607422, -11.375690460205078, - -10.113405227661133, -10.546867370605469, -10.04369068145752, -10.907809257507324, -10.504216194152832, - -11.129199028015137, -10.151124000549316, -21.96586799621582, -9.086349487304688, -11.730339050292969, - -10.460667610168457, -10.298049926757812, -10.784148216247559, -10.840693473815918, -22.03152847290039], - ] - # fmt: on - target = np.array(target).flatten() - predict = output_logits[0, :, :20].flatten() - - def check(a, b, epsilon=5e-4): - return abs(a - b) < epsilon * max(abs(a), abs(b)) - - self.assertTrue(np.all([check(target[i], predict[i]) for i in range(len(target))])) - - @slow - def test_batch_generation(self): - model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese") - tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese") - model.to(torch_device) - - # set deterministically - generation_config = GenerationConfig.from_pretrained("Tanrei/GPTSAN-japanese") - generation_config.top_k = 1 - - # use different length sentences to test batching - sentences = [ - "甲斐なら武田と言うほど", - "織田信長は、", - ] - - tokenizer.padding_side = "left" - inputs = tokenizer(sentences, return_tensors="pt", padding=True) - input_ids = inputs["input_ids"].to(torch_device) - - self.assertNotEqual(inputs["attention_mask"][0].numpy().tolist(), inputs["attention_mask"][1].numpy().tolist()) - - outputs = model.generate( - input_ids=input_ids, - attention_mask=inputs["attention_mask"].to(torch_device), - max_new_tokens=3, - generation_config=generation_config, - ) - - inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device) - output_non_padded = model.generate( - input_ids=inputs_non_padded, max_new_tokens=3, generation_config=generation_config - ) - - inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device) - output_padded = model.generate(input_ids=inputs_padded, max_new_tokens=3, generation_config=generation_config) - - self.assertNotEqual(inputs_non_padded.shape, inputs_padded.shape) - - batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) - non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) - padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) - - expected_output_sentence = [ - "甲斐なら武田と言うほど甲斐の武田", - "織田信長は、このような", - ] - self.assertListEqual(expected_output_sentence, batch_out_sentence) - self.assertListEqual(batch_out_sentence, [non_padded_sentence, padded_sentence]) - - @tooslow - def test_sample(self): - model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese") - tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese") - # Output of original model created with mesh-tensoflow - target = [ - ("武田信玄は", 35675), - ("武田信玄は、", 45), - ("武田信玄は、この", 29), - ("武田信玄は、このよう", 30642), - ("武田信玄は、このような", 35680), - ("武田信玄は、このような「", 8640), - ("武田信玄は、このような「武田", 31617), - ("武田信玄は、このような「武田家", 30646), - ("武田信玄は、このような「武田家の", 31617), - ("武田信玄は、このような「武田家の家", 31381), - ] - for input, output in target: - input_ids = tokenizer.encode(input, return_tensors="pt") - outputs = model(input_ids) - output_logits = outputs.logits.detach().cpu().numpy()[0] - output_id = np.argmax(output_logits[-1]) - self.assertEqual(output_id, output) - - @slow - def test_spout_generation(self): - model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese") - tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese") - model.to(torch_device) - - # set deterministically - generation_config = GenerationConfig.from_pretrained("Tanrei/GPTSAN-japanese") - generation_config.top_k = 1 - - input_text = "武田信玄は、" - input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(torch_device) - input_ids_batch = tokenizer([input_text, input_text], return_tensors="pt").input_ids.to(torch_device) - - # spout from uniform and one-hot - - spouts = [ - [0.87882208, 0.38426396, 0.33220248, 0.43890406, 0.16562252, - 0.04803985, 0.211572 , 0.23188473, 0.37153068, 0.7836377 , - 0.02160172, 0.38761719, 0.75290772, 0.90198857, 0.34365777, - 0.64168169, 0.44318471, 0.14575746, 0.92562881, 0.40812148, - 0.29019122, 0.88861599, 0.65524846, 0.43563456, 0.38177187, - 0.70832965, 0.81527892, 0.68832812, 0.38833192, 0.4561522 , - 0.14828817, 0.47248213, 0.54357335, 0.82009566, 0.1338884 , - 0.02755417, 0.19764677, 0.2422084 , 0.04757674, 0.65409606, - 0.0824589 , 0.03304383, 0.94387689, 0.98764509, 0.82433901, - 0.27646741, 0.64907493, 0.76009406, 0.30087915, 0.17904689, - 0.41601714, 0.67046398, 0.10422822, 0.08447374, 0.07354344, - 0.61423565, 0.70284866, 0.7532333 , 0.1972038 , 0.29575659, - 0.90583886, 0.29265307, 0.50000175, 0.70407655, 0.889363 , - 0.81904418, 0.66829128, 0.64468815, 0.56563723, 0.85601875, - 0.94924672, 0.00166762, 0.25220643, 0.74540219, 0.67993247, - 0.1549675 , 0.39385352, 0.92153607, 0.63745931, 0.27759043, - 0.84702295, 0.65904271, 0.58676614, 0.8666936 , 0.39607438, - 0.79954983, 0.42220697, 0.39650381, 0.7849864 , 0.56150201, - 0.15678925, 0.14746032, 0.34542114, 0.47026783, 0.11956489, - 0.25421435, 0.33788901, 0.68934842, 0.36424685, 0.71737898, - 0.38983449, 0.94393779, 0.39575588, 0.36616553, 0.87104665, - 0.64630203, 0.22516905, 0.88270804, 0.15031338, 0.75144345, - 0.46459025, 0.85396454, 0.86355643, 0.65139851, 0.70266061, - 0.30241389, 0.81056497, 0.88865969, 0.38773807, 0.70635849, - 0.90718459, 0.43245789, 0.28000654, 0.45935562, 0.08773519, - 0.9552151 , 0.93901511, 0.22489288], # uniform - [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0.], - ] # fmt: skip - - output1 = model.generate( - input_ids=input_ids, - spout=spouts[0], - max_new_tokens=20, - generation_config=generation_config, - ) - - output2 = model.generate( - input_ids=input_ids, - spout=spouts[1], - max_new_tokens=20, - generation_config=generation_config, - ) - - output3 = model.generate( - input_ids=input_ids_batch, - spout=spouts, - max_new_tokens=20, - generation_config=generation_config, - ) - - out1_sentence = tokenizer.decode(output1[0]) - out2_sentence = tokenizer.decode(output2[0]) - batch_out_sentence = tokenizer.batch_decode(output3) - - expected_output_sentence = [ - "武田信玄は、武田氏の滅亡後、武田氏の居城であった甲斐武田氏の居城である", - "武田信玄は、武田家の滅亡を防ぐため、武田家の家臣である武田信虎を討", - ] - self.assertListEqual(expected_output_sentence, batch_out_sentence) - self.assertListEqual(batch_out_sentence, [out1_sentence, out2_sentence]) - - @slow - def test_prefix_lm_generation(self): - model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese") - tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese") - model.to(torch_device) - - # set deterministically - generation_config = GenerationConfig.from_pretrained("Tanrei/GPTSAN-japanese") - generation_config.top_k = 1 - - prefix_text_1 = "武田信玄" - prefix_text_2 = "織田信長" - input_text_1 = "は、" - input_text_2 = "が、" - input_tok_1 = tokenizer(input_text_1, prefix_text=prefix_text_1, return_tensors="pt") - input_tok_2 = tokenizer(input_text_2, prefix_text=prefix_text_2, return_tensors="pt") - input_tok_3 = tokenizer([[prefix_text_1, input_text_1], [prefix_text_2, input_text_2]], return_tensors="pt") - - output1 = model.generate( - input_ids=input_tok_1.input_ids.to(torch_device), - token_type_ids=input_tok_1.token_type_ids.to(torch_device), - max_new_tokens=20, - generation_config=generation_config, - ) - - output2 = model.generate( - input_ids=input_tok_2.input_ids.to(torch_device), - token_type_ids=input_tok_2.token_type_ids.to(torch_device), - max_new_tokens=20, - generation_config=generation_config, - ) - - output3 = model.generate( - input_ids=input_tok_3.input_ids.to(torch_device), - token_type_ids=input_tok_3.token_type_ids.to(torch_device), - attention_mask=input_tok_3.attention_mask.to(torch_device), - max_new_tokens=20, - generation_config=generation_config, - ) - - out1_sentence = tokenizer.decode(output1[0]) - out2_sentence = tokenizer.decode(output2[0]) - batch_out_sentence = tokenizer.batch_decode(output3) - - expected_output_sentence = [ - "武田信玄は、武田氏の祖である武田信虎を、その子・武田信友を擁して", - "織田信長が、織田信長の妻・お市の方を妻として迎えたという逸話が残", - ] - self.assertListEqual(expected_output_sentence, batch_out_sentence) - self.assertListEqual(batch_out_sentence, [out1_sentence, out2_sentence]) diff --git a/tests/models/gptsan_japanese/test_tokenization_gptsan_japanese.py b/tests/models/gptsan_japanese/test_tokenization_gptsan_japanese.py deleted file mode 100644 index 8d989a51a7..0000000000 --- a/tests/models/gptsan_japanese/test_tokenization_gptsan_japanese.py +++ /dev/null @@ -1,218 +0,0 @@ -# coding=utf-8 -# Copyright 2023 Toshiyuki Sakamoto(tanreinama) and HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import json -import os -import unittest - -from transformers.models.gptsan_japanese.tokenization_gptsan_japanese import ( - VOCAB_FILES_NAMES, - GPTSanJapaneseTokenizer, -) -from transformers.testing_utils import require_jinja, require_tokenizers, slow - -from ...test_tokenization_common import TokenizerTesterMixin - - -@require_tokenizers -class GPTSanJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase): - from_pretrained_id = "Tanrei/GPTSAN-japanese" - tokenizer_class = GPTSanJapaneseTokenizer - test_rust_tokenizer = False - from_pretrained_kwargs = {"do_clean_text": False, "add_prefix_space": False} - - def setUp(self): - super().setUp() - - vocab_tokens = ["こん", "こんに", "にちは", "ばんは", "世界,㔺界", "、", "。", "
", "", "", "", "", "", "", "", "", "", "", "<|emoji1|>", "", "<|bagoftoken|>", "<|endoftext|>"] # fmt: skip - emoji_tokens = {"emoji": {"\ud83d\ude00": "<|emoji1|>"}, "emoji_inv": {"<|emoji1|>": "\ud83d\ude00"}} # 😀 - self.special_tokens_map = {"unk_token": ""} - - self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"]) - self.emoji_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["emoji_file"]) - with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) - with open(self.emoji_file, "w") as emoji_writer: - emoji_writer.write(json.dumps(emoji_tokens)) - - def get_tokenizer(self, **kwargs): - kwargs.update(self.special_tokens_map) - return GPTSanJapaneseTokenizer.from_pretrained(self.tmpdirname, **kwargs) - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.get_input_output_texts - def get_input_output_texts(self, tokenizer): - input_text = "こんにちは、世界。 \nこんばんは、㔺界。😀" - output_text = "こんにちは、世界。 \nこんばんは、世界。😀" - return input_text, output_text - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.get_clean_sequence - def get_clean_sequence(self, tokenizer): - input_text, output_text = self.get_input_output_texts(tokenizer) - ids = tokenizer.encode(output_text, add_special_tokens=False) - text = tokenizer.decode(ids, clean_up_tokenization_spaces=False) - return text, ids - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_pretokenized_inputs - def test_pretokenized_inputs(self): - pass # TODO add if relevant - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_maximum_encoding_length_pair_input - def test_maximum_encoding_length_pair_input(self): - pass # TODO add if relevant - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_maximum_encoding_length_single_input - def test_maximum_encoding_length_single_input(self): - pass # TODO add if relevant - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_full_tokenizer - def test_full_tokenizer(self): - tokenizer = self.get_tokenizer() - - # Testing tokenization - input_text = "こんにちは、世界。 こんばんは、㔺界。" - expected_token = ["こん", "にちは", "、", "世界", "。", "", "こん", "ばんは", "、", "㔺界", "。"] - tokens = tokenizer.tokenize(input_text) - self.assertListEqual(tokens, expected_token) - - # Testing conversion to ids without special tokens - expected_ids = [0, 2, 5, 4, 6, 8, 0, 3, 5, 4, 6] - input_ids = tokenizer.convert_tokens_to_ids(tokens) - self.assertListEqual(input_ids, expected_ids) - - # Testing conversion to ids with special tokens - input_tokens = tokens + [tokenizer.unk_token] - expected_ids = [0, 2, 5, 4, 6, 8, 0, 3, 5, 4, 6, 19] - input_ids = tokenizer.convert_tokens_to_ids(input_tokens) - self.assertListEqual(input_ids, expected_ids) - - def test_token_bagging(self): - tokenizer = self.get_tokenizer() - - # Testing tokenization - input_text = "こんにちは、<|bagoftoken|>世界。こんばんは、<|bagoftoken|>㔺界。" - expected_text = "こんにちは、、、、世界。こんばんは、、、、世界。" - tokens = tokenizer.encode(input_text) - output_text = tokenizer.decode(tokens) - self.assertEqual(output_text, expected_text) - - @slow - def test_prefix_input(self): - tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese") - - # Testing tokenization - prefix_text = "こんにちは、世界。" - input_text = "こんばんは、㔺界。😀" - expected_text = "こんにちは、世界。こんばんは、世界。😀" - tokens_1 = tokenizer.encode(prefix_text + input_text) - tokens_2 = tokenizer.encode("", prefix_text=prefix_text + input_text) - tokens_3 = tokenizer.encode(input_text, prefix_text=prefix_text) - output_text_1 = tokenizer.decode(tokens_1) - output_text_2 = tokenizer.decode(tokens_2) - output_text_3 = tokenizer.decode(tokens_3) - self.assertEqual(output_text_1, expected_text) - self.assertEqual(output_text_2, expected_text) - self.assertEqual(output_text_3, expected_text) - - @slow - def test_token_type_ids(self): - tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese") - - # Testing tokenization - prefix_text = "こんにちは、世界。" - input_text = "こんばんは、㔺界。😀" - - len_prefix = len(tokenizer.encode(prefix_text)) - 2 - len_text = len(tokenizer.encode(input_text)) - 2 - - expected_mask_1 = [1] + [0] * (len_prefix + len_text + 1) - expected_mask_2 = [1] * (len_prefix + len_text + 1) + [0] - expected_mask_3 = [1] + [1] * (len_prefix) + [0] * (len_text + 1) - - type_id_1 = tokenizer(prefix_text + input_text).token_type_ids - type_id_2 = tokenizer("", prefix_text=prefix_text + input_text).token_type_ids - type_id_3 = tokenizer(input_text, prefix_text=prefix_text).token_type_ids - self.assertListEqual(type_id_1, expected_mask_1) - self.assertListEqual(type_id_2, expected_mask_2) - self.assertListEqual(type_id_3, expected_mask_3) - - @slow - def test_prefix_tokens(self): - tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese") - - x_token_1 = tokenizer.encode("あンいワ") - x_token_2 = tokenizer.encode("", prefix_text="あンいワ") - x_token_3 = tokenizer.encode("いワ", prefix_text="あン") - - self.assertEqual(tokenizer.decode(x_token_1), tokenizer.decode(x_token_2)) - self.assertEqual(tokenizer.decode(x_token_1), tokenizer.decode(x_token_3)) - self.assertNotEqual(x_token_1, x_token_2) - self.assertNotEqual(x_token_1, x_token_3) - self.assertEqual(x_token_1[1], x_token_2[-1]) # SEG token - self.assertEqual(x_token_1[1], x_token_3[3]) # SEG token - - @slow - def test_batch_encode(self): - tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese") - - input_pairs = [["武田信玄", "は、"], ["織田信長", "の配下の、"]] - x_token = tokenizer(input_pairs, padding=True) - x_token_2 = tokenizer.batch_encode_plus(input_pairs, padding=True) - - # fmt: off - expected_outputs = [[35993, 8640, 25948, 35998, 30647, 35675, 35999, 35999], [35993, 10382, 9868, 35998, 30646, 9459, 30646, 35675]] - expected_typeids = [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 1, 0, 0, 0, 0, 0]] - expected_attmask = [[1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1]] - # fmt: on - self.assertListEqual(x_token.input_ids, expected_outputs) - self.assertListEqual(x_token.token_type_ids, expected_typeids) - self.assertListEqual(x_token.attention_mask, expected_attmask) - self.assertListEqual(x_token_2.input_ids, expected_outputs) - self.assertListEqual(x_token_2.token_type_ids, expected_typeids) - self.assertListEqual(x_token_2.attention_mask, expected_attmask) - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_conversion_reversible - def test_conversion_reversible(self): - # Intentionally convert some words to accommodate character fluctuations unique to Japanese - pass - - # Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_padding_different_model_input_name - def test_padding_different_model_input_name(self): - # tokenizer has no padding token - pass - - @require_jinja - def test_tokenization_for_chat(self): - tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese") - # This is in English, but it's just here to make sure the chat control tokens are being added properly - test_chats = [ - [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], - [ - {"role": "system", "content": "You are a helpful chatbot."}, - {"role": "user", "content": "Hello!"}, - {"role": "assistant", "content": "Nice to meet you."}, - ], - [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], - ] - tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] - # fmt: off - expected_tokens = [ - [35993, 35998, 35637, 35659, 35665, 35716, 35645, 35662, 35649, 35716, 35645, 35716, 35652, 35649, 35656, 35660, 35650, 35665, 35656, 35716, 35647, 35652, 35645, 35664, 35646, 35659, 35664, 35595, 35716, 35999, 35993, 35998, 35620, 35649, 35656, 35656, 35659, 35582, 35716, 35999], - [35993, 35998, 35637, 35659, 35665, 35716, 35645, 35662, 35649, 35716, 35645, 35716, 35652, 35649, 35656, 35660, 35650, 35665, 35656, 35716, 35647, 35652, 35645, 35664, 35646, 35659, 35664, 35595, 35716, 35999, 35993, 35998, 35620, 35649, 35656, 35656, 35659, 35582, 35716, 35999, 35993, 35998, 35626, 35653, 35647, 35649, 35716, 35664, 35659, 35716, 35657, 35649, 35649, 35664, 35716, 35669, 35659, 35665, 35595, 35716, 35999], - [35993, 35998, 35626, 35653, 35647, 35649, 35716, 35664, 35659, 35716, 35657, 35649, 35649, 35664, 35716, 35669, 35659, 35665, 35595, 35716, 35999, 35993, 35998, 35620, 35649, 35656, 35656, 35659, 35582, 35716, 35999] - ] - # fmt: on - for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): - self.assertListEqual(tokenized_chat, expected_tokens) diff --git a/tests/models/graphormer/__init__.py b/tests/models/graphormer/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/graphormer/test_modeling_graphormer.py b/tests/models/graphormer/test_modeling_graphormer.py deleted file mode 100644 index 55b1ccc34a..0000000000 --- a/tests/models/graphormer/test_modeling_graphormer.py +++ /dev/null @@ -1,1300 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch Graphormer model.""" - -import copy -import inspect -import os -import tempfile -import unittest - -from transformers import GraphormerConfig, is_torch_available -from transformers.testing_utils import require_torch, slow, torch_device - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - from torch import tensor - - from transformers import GraphormerForGraphClassification, GraphormerModel - - -class GraphormerModelTester: - def __init__( - self, - parent, - num_classes=1, - num_atoms=32 * 9, - num_edges=32 * 3, - num_in_degree=32, - num_out_degree=32, - num_spatial=32, - num_edge_dis=16, - multi_hop_max_dist=5, # sometimes is 20 - spatial_pos_max=32, - edge_type="multi_hop", - init_fn=None, - max_nodes=32, - share_input_output_embed=False, - num_hidden_layers=2, - embedding_dim=32, - ffn_embedding_dim=32, - num_attention_heads=4, - dropout=0.1, - attention_dropout=0.1, - activation_dropout=0.1, - layerdrop=0.0, - encoder_normalize_before=False, - pre_layernorm=False, - apply_graphormer_init=False, - activation_fn="gelu", - embed_scale=None, - freeze_embeddings=False, - num_trans_layers_to_freeze=0, - traceable=False, - q_noise=0.0, - qn_block_size=8, - kdim=None, - vdim=None, - bias=True, - self_attention=True, - batch_size=10, - graph_size=20, - is_training=True, - ): - self.parent = parent - self.num_classes = num_classes - self.num_labels = num_classes - self.num_atoms = num_atoms - self.num_in_degree = num_in_degree - self.num_out_degree = num_out_degree - self.num_edges = num_edges - self.num_spatial = num_spatial - self.num_edge_dis = num_edge_dis - self.edge_type = edge_type - self.multi_hop_max_dist = multi_hop_max_dist - self.spatial_pos_max = spatial_pos_max - self.max_nodes = max_nodes - self.num_hidden_layers = num_hidden_layers - self.embedding_dim = embedding_dim - self.hidden_size = embedding_dim - self.ffn_embedding_dim = ffn_embedding_dim - self.num_attention_heads = num_attention_heads - self.dropout = dropout - self.attention_dropout = attention_dropout - self.activation_dropout = activation_dropout - self.layerdrop = layerdrop - self.encoder_normalize_before = encoder_normalize_before - self.pre_layernorm = pre_layernorm - self.apply_graphormer_init = apply_graphormer_init - self.activation_fn = activation_fn - self.embed_scale = embed_scale - self.freeze_embeddings = freeze_embeddings - self.num_trans_layers_to_freeze = num_trans_layers_to_freeze - self.share_input_output_embed = share_input_output_embed - self.traceable = traceable - self.q_noise = q_noise - self.qn_block_size = qn_block_size - self.init_fn = init_fn - self.kdim = kdim - self.vdim = vdim - self.self_attention = self_attention - self.bias = bias - self.batch_size = batch_size - self.graph_size = graph_size - self.is_training = is_training - - def prepare_config_and_inputs(self): - attn_bias = ids_tensor( - [self.batch_size, self.graph_size + 1, self.graph_size + 1], self.num_atoms - ) # Def not sure here - attn_edge_type = ids_tensor([self.batch_size, self.graph_size, self.graph_size, 1], self.num_edges) - spatial_pos = ids_tensor([self.batch_size, self.graph_size, self.graph_size], self.num_spatial) - in_degree = ids_tensor([self.batch_size, self.graph_size], self.num_in_degree) - out_degree = ids_tensor([self.batch_size, self.graph_size], self.num_out_degree) - input_nodes = ids_tensor([self.batch_size, self.graph_size, 1], self.num_atoms) - input_edges = ids_tensor( - [self.batch_size, self.graph_size, self.graph_size, self.multi_hop_max_dist, 1], self.num_edges - ) - labels = ids_tensor([self.batch_size], self.num_classes) - - config = self.get_config() - - return config, attn_bias, attn_edge_type, spatial_pos, in_degree, out_degree, input_nodes, input_edges, labels - - def get_config(self): - return GraphormerConfig( - num_atoms=self.num_atoms, - num_in_degree=self.num_in_degree, - num_out_degree=self.num_out_degree, - num_edges=self.num_edges, - num_spatial=self.num_spatial, - num_edge_dis=self.num_edge_dis, - edge_type=self.edge_type, - multi_hop_max_dist=self.multi_hop_max_dist, - spatial_pos_max=self.spatial_pos_max, - max_nodes=self.max_nodes, - num_hidden_layers=self.num_hidden_layers, - embedding_dim=self.embedding_dim, - hidden_size=self.embedding_dim, - ffn_embedding_dim=self.ffn_embedding_dim, - num_attention_heads=self.num_attention_heads, - dropout=self.dropout, - attention_dropout=self.attention_dropout, - activation_dropout=self.activation_dropout, - layerdrop=self.layerdrop, - encoder_normalize_before=self.encoder_normalize_before, - pre_layernorm=self.pre_layernorm, - apply_graphormer_init=self.apply_graphormer_init, - activation_fn=self.activation_fn, - embed_scale=self.embed_scale, - freeze_embeddings=self.freeze_embeddings, - num_trans_layers_to_freeze=self.num_trans_layers_to_freeze, - share_input_output_embed=self.share_input_output_embed, - traceable=self.traceable, - q_noise=self.q_noise, - qn_block_size=self.qn_block_size, - init_fn=self.init_fn, - kdim=self.kdim, - vdim=self.vdim, - self_attention=self.self_attention, - bias=self.bias, - ) - - def create_and_check_model( - self, config, attn_bias, attn_edge_type, spatial_pos, in_degree, out_degree, input_nodes, input_edges, labels - ): - model = GraphormerModel(config=config) - model.to(torch_device) - model.eval() - result = model( - input_nodes=input_nodes, - attn_bias=attn_bias, - in_degree=in_degree, - out_degree=out_degree, - spatial_pos=spatial_pos, - input_edges=input_edges, - attn_edge_type=attn_edge_type, - labels=labels, - ) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.graph_size + 1, self.hidden_size) - ) - - def create_and_check_for_graph_classification( - self, config, attn_bias, attn_edge_type, spatial_pos, in_degree, out_degree, input_nodes, input_edges, labels - ): - model = GraphormerForGraphClassification(config) - model.to(torch_device) - model.eval() - result = model( - input_nodes=input_nodes, - attn_bias=attn_bias, - in_degree=in_degree, - out_degree=out_degree, - spatial_pos=spatial_pos, - input_edges=input_edges, - attn_edge_type=attn_edge_type, - labels=labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - attn_bias, - attn_edge_type, - spatial_pos, - in_degree, - out_degree, - input_nodes, - input_edges, - labels, - ) = config_and_inputs - inputs_dict = { - "attn_bias": attn_bias, - "attn_edge_type": attn_edge_type, - "spatial_pos": spatial_pos, - "in_degree": in_degree, - "out_degree": out_degree, - "input_nodes": input_nodes, - "input_edges": input_edges, - "labels": labels, - } - return config, inputs_dict - - -@require_torch -class GraphormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = (GraphormerForGraphClassification, GraphormerModel) if is_torch_available() else () - all_generative_model_classes = () - pipeline_model_mapping = {"feature-extraction": GraphormerModel} if is_torch_available() else {} - test_pruning = False - test_head_masking = False - test_resize_embeddings = False - main_input_name_nodes = "input_nodes" - main_input_name_edges = "input_edges" - has_attentions = False # does not output attention - - def setUp(self): - self.model_tester = GraphormerModelTester(self) - self.config_tester = ConfigTester(self, config_class=GraphormerConfig, has_text_modality=False) - - # overwrite from common as `Graphormer` requires more input arguments - def _create_and_check_torchscript(self, config, inputs_dict): - if not self.test_torchscript: - return - - configs_no_init = _config_zero_init(config) # To be sure we have no Nan - configs_no_init.torchscript = True - for model_class in self.all_model_classes: - model = model_class(config=configs_no_init) - model.to(torch_device) - model.eval() - inputs = self._prepare_for_class(inputs_dict, model_class) - - try: - required_keys = ( - "input_nodes", - "input_edges", - "attn_bias", - "in_degree", - "out_degree", - "spatial_pos", - "attn_edge_type", - ) - required_inputs = tuple(inputs[k] for k in required_keys) - model(*required_inputs) - traced_model = torch.jit.trace(model, required_inputs) - except RuntimeError: - self.fail("Couldn't trace module.") - - with tempfile.TemporaryDirectory() as tmp_dir_name: - pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt") - - try: - torch.jit.save(traced_model, pt_file_name) - except Exception: - self.fail("Couldn't save module.") - - try: - loaded_model = torch.jit.load(pt_file_name) - except Exception: - self.fail("Couldn't load module.") - - model.to(torch_device) - model.eval() - - loaded_model.to(torch_device) - loaded_model.eval() - - model_state_dict = model.state_dict() - loaded_model_state_dict = loaded_model.state_dict() - - non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): - non_persistent_buffers[key] = loaded_model_state_dict[key] - - loaded_model_state_dict = { - key: value for key, value in loaded_model_state_dict.items() if key not in non_persistent_buffers - } - - self.assertEqual(set(model_state_dict.keys()), set(loaded_model_state_dict.keys())) - - model_buffers = list(model.buffers()) - for non_persistent_buffer in non_persistent_buffers.values(): - found_buffer = False - for i, model_buffer in enumerate(model_buffers): - if torch.equal(non_persistent_buffer, model_buffer): - found_buffer = True - break - - self.assertTrue(found_buffer) - model_buffers.pop(i) - - model_buffers = list(model.buffers()) - for non_persistent_buffer in non_persistent_buffers.values(): - found_buffer = False - for i, model_buffer in enumerate(model_buffers): - if torch.equal(non_persistent_buffer, model_buffer): - found_buffer = True - break - - self.assertTrue(found_buffer) - model_buffers.pop(i) - - models_equal = True - for layer_name, p1 in model_state_dict.items(): - if layer_name in loaded_model_state_dict: - p2 = loaded_model_state_dict[layer_name] - if p1.data.ne(p2.data).sum() > 0: - models_equal = False - - self.assertTrue(models_equal) - - # Avoid memory leak. Without this, each call increase RAM usage by ~20MB. - # (Even with this call, there are still memory leak by ~0.04MB) - self.clear_torch_jit_class_registry() - - def test_config(self): - self.config_tester.run_common_tests() - - @unittest.skip(reason="Graphormer does not use one single inputs_embedding but three") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="Graphormer does not implement feed forward chunking") - def test_feed_forward_chunking(self): - pass - - @unittest.skip(reason="Graphormer does not share input and output embeddings") - def test_model_common_attributes(self): - pass - - def test_initialization(self): - def _config_zero_init(config): - configs_no_init = copy.deepcopy(config) - for key in configs_no_init.__dict__.keys(): - if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key: - setattr(configs_no_init, key, 1e-10) - return configs_no_init - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - configs_no_init = _config_zero_init(config) - for model_class in self.all_model_classes: - model = model_class(config=configs_no_init) - for name, param in model.named_parameters(): - if param.requires_grad: - self.assertTrue( - -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0, - msg=f"Parameter {name} of model {model_class} seems not properly initialized", - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - batch_size = self.model_tester.batch_size - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [batch_size, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - # Always returns hidden_states - check_hidden_states_output(inputs_dict, config, model_class) - - def test_retain_grad_hidden_states_attentions(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = False - - # no need to test all models as different heads yield the same functionality - model_class = self.all_model_classes[0] - model = model_class(config) - model.to(torch_device) - - outputs = model(**inputs_dict) - output = outputs[0] - - hidden_states = outputs.hidden_states[0] - hidden_states.retain_grad() - - output.flatten()[0].backward(retain_graph=True) - - self.assertIsNotNone(hidden_states.grad) - - # Inputs are 'input_nodes' and 'input_edges' not 'input_ids' - def test_model_main_input_name(self): - for model_class in self.all_model_classes: - model_signature = inspect.signature(getattr(model_class, "forward")) - # The main input is the name of the argument after `self` - observed_main_input_name_nodes = list(model_signature.parameters.keys())[1] - observed_main_input_name_edges = list(model_signature.parameters.keys())[2] - self.assertEqual(model_class.main_input_name_nodes, observed_main_input_name_nodes) - self.assertEqual(model_class.main_input_name_edges, observed_main_input_name_edges) - - def test_forward_signature(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - signature = inspect.signature(model.forward) - # signature.parameters is an OrderedDict => so arg_names order is deterministic - arg_names = [*signature.parameters.keys()] - - expected_arg_names = ["input_nodes", "input_edges"] - self.assertListEqual(arg_names[:2], expected_arg_names) - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_for_graph_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_graph_classification(*config_and_inputs) - - @slow - def test_model_from_pretrained(self): - model_name = "clefourrier/graphormer-base-pcqm4mv1" - model = GraphormerForGraphClassification.from_pretrained(model_name) - self.assertIsNotNone(model) - - -@require_torch -class GraphormerModelIntegrationTest(unittest.TestCase): - @slow - def test_inference_graph_classification(self): - model = GraphormerForGraphClassification.from_pretrained("clefourrier/graphormer-base-pcqm4mv2") - - # Actual real graph data from the MUTAG dataset - # fmt: off - model_input = { - "attn_bias": tensor( - [ - [ - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - ], - [ - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, float("-inf"), float("-inf"), float("-inf"), float("-inf")], - ], - ] - ), - "attn_edge_type": tensor( - [ - [ - [[0], [3], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [3], [0], [3], [0], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [3], [0], [3], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[3], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [3], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [3], [0], [0], [0]], - [[0], [0], [0], [3], [0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [3], [3], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [0], [3], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [3], [3]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0]], - ], - [ - [[0], [3], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0]], - [[3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [3], [0], [3], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [3], [0], [0], [0], [3], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [3], [0], [3], [3], [0], [0], [0], [0], [0], [0]], - [[3], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [3], [3], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [3], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]], - ], - ] - ), - # fmt: on - "spatial_pos": tensor( - [ - [ - [1, 2, 3, 4, 3, 2, 4, 5, 6, 5, 6, 7, 8, 7, 9, 10, 10], - [2, 1, 2, 3, 4, 3, 5, 6, 5, 4, 5, 6, 7, 6, 8, 9, 9], - [3, 2, 1, 2, 3, 4, 4, 5, 4, 3, 4, 5, 6, 5, 7, 8, 8], - [4, 3, 2, 1, 2, 3, 3, 4, 3, 2, 3, 4, 5, 4, 6, 7, 7], - [3, 4, 3, 2, 1, 2, 2, 3, 4, 3, 4, 5, 6, 5, 7, 8, 8], - [2, 3, 4, 3, 2, 1, 3, 4, 5, 4, 5, 6, 7, 6, 8, 9, 9], - [4, 5, 4, 3, 2, 3, 1, 2, 3, 4, 5, 6, 5, 4, 6, 7, 7], - [5, 6, 5, 4, 3, 4, 2, 1, 2, 3, 4, 5, 4, 3, 5, 6, 6], - [6, 5, 4, 3, 4, 5, 3, 2, 1, 2, 3, 4, 3, 2, 4, 5, 5], - [5, 4, 3, 2, 3, 4, 4, 3, 2, 1, 2, 3, 4, 3, 5, 6, 6], - [6, 5, 4, 3, 4, 5, 5, 4, 3, 2, 1, 2, 3, 4, 4, 5, 5], - [7, 6, 5, 4, 5, 6, 6, 5, 4, 3, 2, 1, 2, 3, 3, 4, 4], - [8, 7, 6, 5, 6, 7, 5, 4, 3, 4, 3, 2, 1, 2, 2, 3, 3], - [7, 6, 5, 4, 5, 6, 4, 3, 2, 3, 4, 3, 2, 1, 3, 4, 4], - [9, 8, 7, 6, 7, 8, 6, 5, 4, 5, 4, 3, 2, 3, 1, 2, 2], - [10, 9, 8, 7, 8, 9, 7, 6, 5, 6, 5, 4, 3, 4, 2, 1, 3], - [10, 9, 8, 7, 8, 9, 7, 6, 5, 6, 5, 4, 3, 4, 2, 3, 1], - ], - [ - [1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 4, 5, 5, 0, 0, 0, 0], - [2, 1, 2, 3, 4, 5, 4, 3, 4, 3, 5, 6, 6, 0, 0, 0, 0], - [3, 2, 1, 2, 3, 4, 3, 2, 3, 4, 4, 5, 5, 0, 0, 0, 0], - [4, 3, 2, 1, 2, 3, 4, 3, 4, 5, 5, 6, 6, 0, 0, 0, 0], - [5, 4, 3, 2, 1, 2, 3, 4, 5, 6, 6, 7, 7, 0, 0, 0, 0], - [6, 5, 4, 3, 2, 1, 2, 3, 4, 5, 5, 6, 6, 0, 0, 0, 0], - [5, 4, 3, 4, 3, 2, 1, 2, 3, 4, 4, 5, 5, 0, 0, 0, 0], - [4, 3, 2, 3, 4, 3, 2, 1, 2, 3, 3, 4, 4, 0, 0, 0, 0], - [3, 4, 3, 4, 5, 4, 3, 2, 1, 2, 2, 3, 3, 0, 0, 0, 0], - [2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 3, 4, 4, 0, 0, 0, 0], - [4, 5, 4, 5, 6, 5, 4, 3, 2, 3, 1, 2, 2, 0, 0, 0, 0], - [5, 6, 5, 6, 7, 6, 5, 4, 3, 4, 2, 1, 3, 0, 0, 0, 0], - [5, 6, 5, 6, 7, 6, 5, 4, 3, 4, 2, 3, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ], - ] - ), - "in_degree": tensor( - [ - [3, 3, 3, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4, 3, 4, 2, 2], - [3, 3, 4, 3, 3, 3, 3, 4, 4, 3, 4, 2, 2, 0, 0, 0, 0], - ] - ), - "out_degree": tensor( - [ - [3, 3, 3, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4, 3, 4, 2, 2], - [3, 3, 4, 3, 3, 3, 3, 4, 4, 3, 4, 2, 2, 0, 0, 0, 0], - ] - ), - "input_nodes": tensor( - [ - [[3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3]], - [[3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [0], [0], [0], [0]], - ] - ), - "input_edges": tensor( - [ - [ - [ - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - ], - [ - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - ], - [ - [ - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [4]], - [[4], [4], [4], [4], [0]], - [[4], [4], [4], [0], [0]], - [[4], [4], [0], [0], [0]], - [[4], [4], [4], [0], [0]], - [[4], [0], [0], [0], [0]], - [[4], [4], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - [ - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - [[0], [0], [0], [0], [0]], - ], - ], - ] - ), - "labels": tensor([1, 0]), - } - - output = model(**model_input)["logits"] - - expected_shape = torch.Size((2, 1)) - self.assertEqual(output.shape, expected_shape) - - expected_logs = torch.tensor( - [[7.6060], [7.4126]] - ) - - self.assertTrue(torch.allclose(output, expected_logs, atol=1e-4)) diff --git a/tests/models/jukebox/__init__.py b/tests/models/jukebox/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/jukebox/test_modeling_jukebox.py b/tests/models/jukebox/test_modeling_jukebox.py deleted file mode 100644 index f064f442fc..0000000000 --- a/tests/models/jukebox/test_modeling_jukebox.py +++ /dev/null @@ -1,407 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -from unittest import skip - -from transformers import is_torch_available -from transformers.testing_utils import ( - require_torch, - require_torch_accelerator, - require_torch_fp16, - slow, - torch_device, -) -from transformers.trainer_utils import set_seed - - -if is_torch_available(): - import torch - - from transformers import JukeboxModel, JukeboxPrior, JukeboxTokenizer - - -@require_torch -class Jukebox1bModelTester(unittest.TestCase): - all_model_classes = (JukeboxModel,) if is_torch_available() else () - model_id = "openai/jukebox-1b-lyrics" - metas = { - "artist": "Zac Brown Band", - "genres": "Country", - "lyrics": """I met a traveller from an antique land, - Who said "Two vast and trunkless legs of stone - Stand in the desert. . . . Near them, on the sand, - Half sunk a shattered visage lies, whose frown, - And wrinkled lip, and sneer of cold command, - Tell that its sculptor well those passions read - Which yet survive, stamped on these lifeless things, - The hand that mocked them, and the heart that fed; - And on the pedestal, these words appear: - My name is Ozymandias, King of Kings; - Look on my Works, ye Mighty, and despair! - Nothing beside remains. Round the decay - Of that colossal Wreck, boundless and bare - The lone and level sands stretch far away - """, - } - # fmt: off - EXPECTED_OUTPUT_2 = [ - 1864, 1536, 1213, 1870, 1357, 1536, 519, 880, 1323, 789, 1082, 534, - 1000, 1445, 1105, 1130, 967, 515, 1434, 1620, 534, 1495, 283, 1445, - 333, 1307, 539, 1631, 1528, 375, 1434, 673, 627, 710, 778, 1883, - 1405, 1276, 1455, 1228 - ] - - EXPECTED_OUTPUT_2_PT_2 = [ - 1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653 - ] - - EXPECTED_OUTPUT_1 = [ - 1125, 1751, 697, 1776, 1141, 1476, 391, 697, 1125, 684, 867, 416, - 844, 1372, 1274, 717, 1274, 844, 1299, 1419, 697, 1370, 317, 1125, - 191, 1440, 1370, 1440, 1370, 282, 1621, 1370, 368, 349, 867, 1872, - 1262, 869, 1728, 747 - ] - EXPECTED_OUTPUT_1_PT_2 = [ - 416, 416, 1125, 1125, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416 - ] - - EXPECTED_OUTPUT_0 = [ - 1755, 842, 307, 1843, 1022, 1395, 234, 1554, 806, 739, 1022, 442, - 616, 556, 268, 1499, 933, 457, 1440, 1837, 755, 985, 308, 902, - 293, 1443, 1671, 1141, 1533, 555, 1562, 1061, 287, 417, 1022, 2008, - 1186, 1015, 1777, 268 - ] - EXPECTED_OUTPUT_0_PT_2 = [ - 854, 842, 1353, 114, 1353, 842, 185, 842, 185, 114, 591, 842, - 185, 417, 185, 842, 307, 842, 591, 842, 185, 842, 307, 842, - 591, 842, 1353, 842, 185, 842, 591, 842, 591, 114, 591, 842, - 185, 842, 591, 89 - ] - - EXPECTED_Y_COND = [1058304, 0, 786432, 7169, 507, 76, 27, 40, 30, 76] - - EXPECTED_PRIMED_0 = [ - 390, 1160, 1002, 1907, 1788, 1788, 1788, 1907, 1002, 1002, 1854, 1002, - 1002, 1002, 1002, 1002, 1002, 1160, 1160, 1606, 596, 596, 1160, 1002, - 1516, 596, 1002, 1002, 1002, 1907, 1788, 1788, 1788, 1854, 1788, 1907, - 1907, 1788, 596, 1626 - ] - EXPECTED_PRIMED_1 = [ - 1236, 1668, 1484, 1920, 1848, 1409, 139, 864, 1828, 1272, 1599, 824, - 1672, 139, 555, 1484, 824, 1920, 555, 596, 1579, 1599, 1231, 1599, - 1637, 1407, 212, 824, 1599, 116, 1433, 824, 258, 1599, 1433, 1895, - 1063, 1433, 1433, 1599 - ] - EXPECTED_PRIMED_2 = [ - 1684, 1873, 1119, 1189, 395, 611, 1901, 972, 890, 1337, 1392, 1927, - 96, 972, 672, 780, 1119, 890, 158, 771, 1073, 1927, 353, 1331, - 1269, 1459, 1333, 1645, 812, 1577, 1337, 606, 353, 981, 1466, 619, - 197, 391, 302, 1930 - ] - EXPECTED_VQVAE_ENCODE = [ - 390, 1160, 1002, 1907, 1788, 1788, 1788, 1907, 1002, 1002, 1854, 1002, - 1002, 1002, 1002, 1002, 1002, 1160, 1160, 1606, 596, 596, 1160, 1002, - 1516, 596, 1002, 1002, 1002, 1907, 1788, 1788, 1788, 1854, 1788, 1907, - 1907, 1788, 596, 1626 - ] - EXPECTED_VQVAE_DECODE = [ - -0.0492, -0.0524, -0.0565, -0.0640, -0.0686, -0.0684, -0.0677, -0.0664, - -0.0605, -0.0490, -0.0330, -0.0168, -0.0083, -0.0075, -0.0051, 0.0025, - 0.0136, 0.0261, 0.0386, 0.0497, 0.0580, 0.0599, 0.0583, 0.0614, - 0.0740, 0.0889, 0.1023, 0.1162, 0.1211, 0.1212, 0.1251, 0.1336, - 0.1502, 0.1686, 0.1883, 0.2148, 0.2363, 0.2458, 0.2507, 0.2531 - ] - EXPECTED_AUDIO_COND = [ - 0.0256, -0.0544, 0.1600, -0.0032, 0.1066, 0.0825, -0.0013, 0.3440, - 0.0210, 0.0412, -0.1777, -0.0892, -0.0164, 0.0285, -0.0613, -0.0617, - -0.0137, -0.0201, -0.0175, 0.0215, -0.0627, 0.0520, -0.0730, 0.0970, - -0.0100, 0.0442, -0.0586, 0.0207, -0.0015, -0.0082 - ] - EXPECTED_META_COND = [ - 0.0415, 0.0877, 0.0022, -0.0055, 0.0751, 0.0334, 0.0324, -0.0068, - 0.0011, 0.0017, -0.0676, 0.0655, -0.0143, 0.0399, 0.0303, 0.0743, - -0.0168, -0.0394, -0.1113, 0.0124, 0.0442, 0.0267, -0.0003, -0.1536, - -0.0116, -0.1837, -0.0180, -0.1026, -0.0777, -0.0456 - ] - EXPECTED_LYRIC_COND = [ - 76, 27, 40, 30, 76, 46, 44, 47, 40, 37, 38, 31, 45, 45, 76, 38, 31, 33, - 45, 76, 41, 32, 76, 45, 46, 41, 40, 31, 78, 76 - ] - # fmt: on - - def prepare_inputs(self): - tokenizer = JukeboxTokenizer.from_pretrained(self.model_id) - tokens = tokenizer(**self.metas)["input_ids"] - return tokens - - @slow - def test_sampling(self): - model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() - labels = self.prepare_inputs() - - set_seed(0) - zs = [torch.zeros(1, 0, dtype=torch.long).cpu() for _ in range(3)] - zs = model._sample(zs, labels, [0], sample_length=40 * model.priors[0].raw_to_tokens, save_results=False) - self.assertIn(zs[0][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_2, self.EXPECTED_OUTPUT_2_PT_2]) - - set_seed(0) - zs = model._sample(zs, labels, [1], sample_length=40 * model.priors[1].raw_to_tokens, save_results=False) - self.assertIn(zs[1][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_1, self.EXPECTED_OUTPUT_1_PT_2]) - - set_seed(0) - zs = model._sample(zs, labels, [2], sample_length=40 * model.priors[2].raw_to_tokens, save_results=False) - self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2]) - - @slow - def test_conditioning(self): - torch.backends.cuda.matmul.allow_tf32 = False - torch.backends.cudnn.allow_tf32 = False - model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() - - labels = self.prepare_inputs() - set_seed(0) - zs = [torch.zeros(1, 0, dtype=torch.long) for _ in range(3)] - - top_prior = model.priors[0] - start = 0 - music_token_conds = top_prior.get_music_tokens_conds(zs, start=start, end=start + top_prior.n_ctx) - metadata = top_prior.get_metadata(labels[0].clone(), start, 1058304, 0) - - self.assertIsNone(music_token_conds) - self.assertListEqual(metadata.numpy()[0][:10].tolist(), self.EXPECTED_Y_COND) - - audio_conditioning, metadata_conditioning, lyric_tokens = top_prior.get_cond(music_token_conds, metadata) - torch.testing.assert_close( - audio_conditioning[0][0][:30].detach(), torch.tensor(self.EXPECTED_AUDIO_COND), atol=1e-4, rtol=1e-4 - ) - torch.testing.assert_close( - metadata_conditioning[0][0][:30].detach(), torch.tensor(self.EXPECTED_META_COND), atol=1e-4, rtol=1e-4 - ) - torch.testing.assert_close( - lyric_tokens[0, :30].detach(), torch.tensor(self.EXPECTED_LYRIC_COND), atol=1e-4, rtol=1e-4 - ) - - @slow - def test_primed_sampling(self): - torch.backends.cuda.matmul.allow_tf32 = False - torch.backends.cudnn.allow_tf32 = False - - model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() - set_seed(0) - waveform = torch.rand((1, 5120, 1)) - tokens = list(self.prepare_inputs()) - - zs = [model.vqvae.encode(waveform, start_level=2, bs_chunks=waveform.shape[0])[0], None, None] - zs = model._sample( - zs, tokens, sample_levels=[0], save_results=False, sample_length=40 * model.priors[0].raw_to_tokens - ) - torch.testing.assert_close(zs[0][0][:40], torch.tensor(self.EXPECTED_PRIMED_0)) - - upper_2 = torch.cat((zs[0], torch.zeros(1, 2048 - zs[0].shape[-1])), dim=-1).long() - zs = [upper_2, model.vqvae.encode(waveform, start_level=1, bs_chunks=waveform.shape[0])[0], None] - zs = model._sample( - zs, tokens, sample_levels=[1], save_results=False, sample_length=40 * model.priors[1].raw_to_tokens - ) - torch.testing.assert_close(zs[1][0][:40], torch.tensor(self.EXPECTED_PRIMED_1)) - - upper_1 = torch.cat((zs[1], torch.zeros(1, 2048 - zs[1].shape[-1])), dim=-1).long() - zs = [upper_2, upper_1, model.vqvae.encode(waveform, start_level=0, bs_chunks=waveform.shape[0])[0]] - zs = model._sample( - zs, tokens, sample_levels=[2], save_results=False, sample_length=40 * model.priors[2].raw_to_tokens - ) - torch.testing.assert_close(zs[2][0][:40].cpu(), torch.tensor(self.EXPECTED_PRIMED_2)) - - @slow - def test_vqvae(self): - model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() - set_seed(0) - x = torch.rand((1, 5120, 1)) - with torch.no_grad(): - zs = model.vqvae.encode(x, start_level=2, bs_chunks=x.shape[0]) - torch.testing.assert_close(zs[0][0], torch.tensor(self.EXPECTED_VQVAE_ENCODE)) - - with torch.no_grad(): - x = model.vqvae.decode(zs, start_level=2, bs_chunks=x.shape[0]) - torch.testing.assert_close(x[0, :40, 0], torch.tensor(self.EXPECTED_VQVAE_DECODE), atol=1e-4, rtol=1e-4) - - -@require_torch -class Jukebox5bModelTester(unittest.TestCase): - all_model_classes = (JukeboxModel,) if is_torch_available() else () - model_id = "openai/jukebox-5b-lyrics" - metas = { - "artist": "Zac Brown Band", - "genres": "Country", - "lyrics": """I met a traveller from an antique land, - Who said "Two vast and trunkless legs of stone - Stand in the desert. . . . Near them, on the sand, - Half sunk a shattered visage lies, whose frown, - And wrinkled lip, and sneer of cold command, - Tell that its sculptor well those passions read - Which yet survive, stamped on these lifeless things, - The hand that mocked them, and the heart that fed; - And on the pedestal, these words appear: - My name is Ozymandias, King of Kings; - Look on my Works, ye Mighty, and despair! - Nothing beside remains. Round the decay - Of that colossal Wreck, boundless and bare - The lone and level sands stretch far away - """, - } - - # fmt: off - EXPECTED_OUTPUT_2 = [ - 1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 1489, 1489, 1489, 1489, 1150, 1853, 1509, 1150, 1357, 1509, 6, 1272 - ] - EXPECTED_OUTPUT_2_PT_2 = [ - 1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653 - ] - - EXPECTED_OUTPUT_1 = [ - 1125, 416, 1125, 1125, 1125, 1125, 1125, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416 - ] - EXPECTED_OUTPUT_1_PT_2 = [ - 416, 416, 1125, 1125, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416 - ] - - EXPECTED_OUTPUT_0 = [ - 1755, 1061, 234, 1755, 1061, 1755, 185, 290, 307, 307, 616, 616, - 616, 616, 616, 616, 307, 290, 417, 1755, 234, 1755, 185, 290, - 290, 290, 307, 616, 616, 616, 616, 616, 290, 234, 234, 1755, - 234, 234, 1755, 234, 185, 185, 307, 616, 616, 616, 616, 290, - 1755, 1755, 1755, 234, 234, 1755, 1572, 290, 307, 616, 34, 616 - ] - EXPECTED_OUTPUT_0_PT_2 = [ - 854, 842, 1353, 114, 1353, 842, 185, 842, 185, 114, 591, 842, 185, - 417, 185, 842, 307, 842, 591, 842, 185, 842, 185, 842, 591, 842, - 1353, 842, 185, 842, 591, 842, 591, 114, 591, 842, 185, 842, 591, - 89, 591, 842, 591, 842, 591, 417, 1372, 842, 1372, 842, 34, 842, - 185, 89, 591, 842, 185, 842, 591, 632 - ] - - EXPECTED_GPU_OUTPUTS_2 = [ - 1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653 - ] - EXPECTED_GPU_OUTPUTS_2_PT_2 = [ - 1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, - 653, 653, 653, 653, 653, 653, 653, 1853, 1177, 1536, 1228, - 710, 475, 1489, 1229, 1224, 231, 1224, 252, 1434, 653, 475, - 1106, 1877, 1599, 1228, 1600, 1683, 1182, 1853, 475, 1864, - 252, 1229, 1434, 2001 - ] - - EXPECTED_GPU_OUTPUTS_1 = [ - 1125, 1125, 416, 1125, 1125, 416, 1125, 1125, 416, 416, 1125, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416 - ] - EXPECTED_GPU_OUTPUTS_0 = [ - 491, 1755, 34, 1613, 1755, 417, 992, 1613, 222, 842, 1353, 1613, - 844, 632, 185, 1613, 844, 632, 185, 1613, 185, 842, 677, 1613, - 185, 114, 1353, 1613, 307, 89, 844, 1613, 307, 1332, 234, 1979, - 307, 89, 1353, 616, 34, 842, 185, 842, 34, 842, 185, 842, - 307, 114, 185, 89, 34, 1268, 185, 89, 34, 842, 185, 89 - ] - # fmt: on - - def prepare_inputs(self, model_id): - tokenizer = JukeboxTokenizer.from_pretrained(model_id) - tokens = tokenizer(**self.metas)["input_ids"] - return tokens - - @slow - def test_sampling(self): - model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() - labels = self.prepare_inputs(self.model_id) - - set_seed(0) - zs = [torch.zeros(1, 0, dtype=torch.long).cpu() for _ in range(3)] - zs = model._sample(zs, labels, [0], sample_length=60 * model.priors[0].raw_to_tokens, save_results=False) - self.assertIn(zs[0][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_2, self.EXPECTED_OUTPUT_2_PT_2]) - - set_seed(0) - zs = model._sample(zs, labels, [1], sample_length=60 * model.priors[1].raw_to_tokens, save_results=False) - self.assertIn(zs[1][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_1, self.EXPECTED_OUTPUT_1_PT_2]) - - set_seed(0) - zs = model._sample(zs, labels, [2], sample_length=60 * model.priors[2].raw_to_tokens, save_results=False) - self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2]) - - @slow - @require_torch_accelerator - @skip("Not enough GPU memory on CI runners") - def test_slow_sampling(self): - model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() - labels = [i.to(torch_device) for i in self.prepare_inputs(self.model_id)] - - set_seed(0) - model.priors[0].to(torch_device) - zs = [torch.zeros(1, 0, dtype=torch.long).to(torch_device) for _ in range(3)] - zs = model._sample(zs, labels, [0], sample_length=60 * model.priors[0].raw_to_tokens, save_results=False) - torch.testing.assert_close(zs[0][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_2)) - model.priors[0].cpu() - - set_seed(0) - model.priors[1].to(torch_device) - zs = model._sample(zs, labels, [1], sample_length=60 * model.priors[1].raw_to_tokens, save_results=False) - torch.testing.assert_close(zs[1][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_1)) - model.priors[1].cpu() - - set_seed(0) - model.priors[2].to(torch_device) - zs = model._sample(zs, labels, [2], sample_length=60 * model.priors[2].raw_to_tokens, save_results=False) - torch.testing.assert_close(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0)) - - @slow - @require_torch_accelerator - @require_torch_fp16 - def test_fp16_slow_sampling(self): - prior_id = "ArthurZ/jukebox_prior_0" - model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device) - - labels = self.prepare_inputs(prior_id)[0].to(torch_device) - metadata = model.get_metadata(labels, 0, 7680, 0) - set_seed(0) - outputs = model.sample(1, metadata=metadata, sample_tokens=60) - self.assertIn(outputs[0].cpu().tolist(), [self.EXPECTED_GPU_OUTPUTS_2, self.EXPECTED_GPU_OUTPUTS_2_PT_2]) diff --git a/tests/models/jukebox/test_tokenization_jukebox.py b/tests/models/jukebox/test_tokenization_jukebox.py deleted file mode 100644 index c434cf6aa1..0000000000 --- a/tests/models/jukebox/test_tokenization_jukebox.py +++ /dev/null @@ -1,209 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from transformers import JukeboxTokenizer -from transformers.testing_utils import require_torch - - -class JukeboxTokenizationTest(unittest.TestCase): - tokenizer_class = JukeboxTokenizer - metas = { - "artist": "Zac Brown Band", - "genres": "Country", - "lyrics": """I met a traveller from an antique land, - Who said "Two vast and trunkless legs of stone - Stand in the desert. . . . Near them, on the sand, - Half sunk a shattered visage lies, whose frown, - And wrinkled lip, and sneer of cold command, - Tell that its sculptor well those passions read - Which yet survive, stamped on these lifeless things, - The hand that mocked them, and the heart that fed; - And on the pedestal, these words appear: - My name is Ozymandias, King of Kings; - Look on my Works, ye Mighty, and despair! - Nothing beside remains. Round the decay - Of that colossal Wreck, boundless and bare - The lone and level sands stretch far away - """, - } - - @require_torch - def test_1b_lyrics_tokenizer(self): - """ - how to run the same test with openAI - ... - """ - import torch - - tokenizer = JukeboxTokenizer.from_pretrained("openai/jukebox-1b-lyrics") - tokens = tokenizer(**self.metas)["input_ids"] - # fmt: off - EXPECTED_OUTPUT = [ - torch.tensor([[ - 0, 0, 0, 7169, 507, 9, 76, 39, 31, 46, 76, 27, - 76, 46, 44, 27, 48, 31, 38, 38, 31, 44, 76, 32, - 44, 41, 39, 76, 27, 40, 76, 27, 40, 46, 35, 43, - 47, 31, 76, 38, 27, 40, 30, 64, 78, 76, 76, 76, - 76, 76, 76, 76, 76, 23, 34, 41, 76, 45, 27, 35, - 30, 76, 71, 20, 49, 41, 76, 48, 27, 45, 46, 76, - 27, 40, 30, 76, 46, 44, 47, 40, 37, 38, 31, 45, - 45, 76, 38, 31, 33, 45, 76, 41, 32, 76, 45, 46, - 41, 40, 31, 78, 76, 76, 76, 76, 76, 76, 76, 76, - 19, 46, 27, 40, 30, 76, 35, 40, 76, 46, 34, 31, - 76, 30, 31, 45, 31, 44, 46, 63, 76, 63, 76, 63, - 76, 63, 76, 14, 31, 27, 44, 76, 46, 34, 31, 39, - 64, 76, 41, 40, 76, 46, 34, 31, 76, 45, 27, 40, - 30, 64, 78, 76, 76, 76, 76, 76, 76, 76, 76, 8, - 27, 38, 32, 76, 45, 47, 40, 37, 76, 27, 76, 45, - 34, 27, 46, 46, 31, 44, 31, 30, 76, 48, 35, 45, - 27, 33, 31, 76, 38, 35, 31, 45, 64, 76, 49, 34, - 41, 45, 31, 76, 32, 44, 41, 49, 40, 64, 78, 76, - 76, 76, 76, 76, 76, 76, 76, 1, 40, 30, 76, 49, - 44, 35, 40, 37, 38, 31, 30, 76, 38, 35, 42, 64, - 76, 27, 40, 30, 76, 45, 40, 31, 31, 44, 76, 41, - 32, 76, 29, 41, 38, 30, 76, 29, 41, 39, 39, 27, - 40, 30, 64, 78, 76, 76, 76, 76, 76, 76, 76, 76, - 20, 31, 38, 38, 76, 46, 34, 27, 46, 76, 35, 46, - 45, 76, 45, 29, 47, 38, 42, 46, 41, 44, 76, 49, - 31, 38, 38, 76, 46, 34, 41, 45, 31, 76, 42, 27, - 45, 45, 35, 41, 40, 45, 76, 44, 31, 27, 30, 78, - 76, 76, 76, 76, 76, 76, 76, 76, 23, 34, 35, 29, - 34, 76, 51, 31, 46, 76, 45, 47, 44, 48, 35, 48, - 31, 64, 76, 45, 46, 27, 39, 42, 31, 30, 76, 41, - 40, 76, 46, 34, 31, 45, 31, 76, 38, 35, 32, 31, - 38, 31, 45, 45, 76, 46, 34, 35, 40, 33, 45, 64, - 78, 76, 76, 76, 76, 76, 76, 76, 76, 20, 34, 31, - 76, 34, 27, 40, 30, 76, 46, 34, 27, 46, 76, 39, - 41, 29, 37, 31, 30, 76, 46, 34, 31, 39, 64, 76, - 27, 40, 30, 76, 46, 34, 31, 76, 34, 31, 27, 44, - 46, 76, 46, 34, 27, 46, 76, 32, 31, 30, 66, 78, - 76, 76, 76, 76, 76, 76, 76, 76, 1, 40, 30, 76, - 41, 40, 76, 46, 34, 31, 76, 42, 31, 30, 31, 45, - 46, 27, 38, 64, 76, 46, 34, 31, 45, 31, 76, 49, - 41, 44, 30, 45, 76, 27, 42, 42, 31, 27, 44, 65, - 78, 76, 76, 76, 76, 76, 76, 76, 76, 13, 51, 76, - 40, 27, 39, 31, 76, 35, 45, 76, 15, 52, 51, 39, - 27, 40, 30, 35, 27, 45, 64, 76, 11, 35, 40, 33, - 76, 41, 32, 76, 11, 35, 40, 33, 45, 66, 78, 76, - 76, 76, 76, 76, 76, 76, 76, 12, 41, 41, 37, 76, - 41, 40, 76, 39, 51, 76, 23, 41, 44, 37, 45, 64, - 76, 51, 31, 76, 13, 35, 33, 34, 46, 51, 64, 76, - 27, 40, 30, 76, 30, 31, 45, 42, 27, 35, 44, 67, - 78, 76, 76, 76, 76, 76, 76, 76, 76, 14, 41, 46, - 34, 35, 40, 33, 76, 28, 31, 45, 35, 30, 31, 76, - 44, 31, 39, 27, 35, 40, 45, 63, 76, 18, 41, 47, - 40, 30, 76, 46, 34, 31, 76, 30, 31, 29, 27, 51, - 78, 76, 76, 76, 76, 76, 76, 76, 76, 15, 32, 76, - 46, 34, 27, 46, 76, 29, 41, 38, 41, 45, 45, 27, - 38, 76, 23, 44, 31, 29, 37, 64, 76, 28, 41, 47, - 40, 30, 38, 31, 45, 45, 76, 27, 40, 30, 76, 28, - 27, 44, 31, 78, 76, 76, 76, 76, 76, 76, 76, 76, - 20, 34, 31, 76, 38, 41, 40, 31, 76, 27, 40, 30, - 76, 38, 31, 48, 31, 38, 76, 45, 27, 40, 30, 45, - 76, 45, 46, 44, 31, 46, 29, 34, 76, 32, 27, 44, - 76, 27, 49, 27, 51, 78, 76, 76, 76, 76, 76, 76, - 76, 76]]), - torch.tensor([[0, 0, 0, 1069, 11]]), - torch.tensor([[0, 0, 0, 1069, 11]]), - ] - # fmt: on - self.assertTrue(torch.allclose(tokens[0], EXPECTED_OUTPUT[0])) - self.assertTrue(torch.allclose(tokens[1], EXPECTED_OUTPUT[1])) - self.assertTrue(torch.allclose(tokens[2], EXPECTED_OUTPUT[2])) - - @require_torch - def test_5b_lyrics_tokenizer(self): - """ - The outputs are similar that open AI but do not have the same format as this one is adapted to the HF integration. - """ - import torch - - tokenizer = JukeboxTokenizer.from_pretrained("openai/jukebox-5b-lyrics") - tokens = tokenizer(**self.metas)["input_ids"] - # fmt: off - EXPECTED_OUTPUT = [ - torch.tensor([[ - 0, 0, 0, 1069, 11, -1, -1, -1, -1, 9, 77, 39, - 31, 46, 77, 27, 77, 46, 44, 27, 48, 31, 38, 38, - 31, 44, 77, 32, 44, 41, 39, 77, 27, 40, 77, 27, - 40, 46, 35, 43, 47, 31, 77, 38, 27, 40, 30, 64, - 79, 77, 77, 77, 77, 77, 77, 77, 77, 23, 34, 41, - 77, 45, 27, 35, 30, 77, 72, 20, 49, 41, 77, 48, - 27, 45, 46, 77, 27, 40, 30, 77, 46, 44, 47, 40, - 37, 38, 31, 45, 45, 77, 38, 31, 33, 45, 77, 41, - 32, 77, 45, 46, 41, 40, 31, 79, 77, 77, 77, 77, - 77, 77, 77, 77, 19, 46, 27, 40, 30, 77, 35, 40, - 77, 46, 34, 31, 77, 30, 31, 45, 31, 44, 46, 63, - 77, 63, 77, 63, 77, 63, 77, 14, 31, 27, 44, 77, - 46, 34, 31, 39, 64, 77, 41, 40, 77, 46, 34, 31, - 77, 45, 27, 40, 30, 64, 79, 77, 77, 77, 77, 77, - 77, 77, 77, 8, 27, 38, 32, 77, 45, 47, 40, 37, - 77, 27, 77, 45, 34, 27, 46, 46, 31, 44, 31, 30, - 77, 48, 35, 45, 27, 33, 31, 77, 38, 35, 31, 45, - 64, 77, 49, 34, 41, 45, 31, 77, 32, 44, 41, 49, - 40, 64, 79, 77, 77, 77, 77, 77, 77, 77, 77, 1, - 40, 30, 77, 49, 44, 35, 40, 37, 38, 31, 30, 77, - 38, 35, 42, 64, 77, 27, 40, 30, 77, 45, 40, 31, - 31, 44, 77, 41, 32, 77, 29, 41, 38, 30, 77, 29, - 41, 39, 39, 27, 40, 30, 64, 79, 77, 77, 77, 77, - 77, 77, 77, 77, 20, 31, 38, 38, 77, 46, 34, 27, - 46, 77, 35, 46, 45, 77, 45, 29, 47, 38, 42, 46, - 41, 44, 77, 49, 31, 38, 38, 77, 46, 34, 41, 45, - 31, 77, 42, 27, 45, 45, 35, 41, 40, 45, 77, 44, - 31, 27, 30, 79, 77, 77, 77, 77, 77, 77, 77, 77, - 23, 34, 35, 29, 34, 77, 51, 31, 46, 77, 45, 47, - 44, 48, 35, 48, 31, 64, 77, 45, 46, 27, 39, 42, - 31, 30, 77, 41, 40, 77, 46, 34, 31, 45, 31, 77, - 38, 35, 32, 31, 38, 31, 45, 45, 77, 46, 34, 35, - 40, 33, 45, 64, 79, 77, 77, 77, 77, 77, 77, 77, - 77, 20, 34, 31, 77, 34, 27, 40, 30, 77, 46, 34, - 27, 46, 77, 39, 41, 29, 37, 31, 30, 77, 46, 34, - 31, 39, 64, 77, 27, 40, 30, 77, 46, 34, 31, 77, - 34, 31, 27, 44, 46, 77, 46, 34, 27, 46, 77, 32, - 31, 30, 66, 79, 77, 77, 77, 77, 77, 77, 77, 77, - 1, 40, 30, 77, 41, 40, 77, 46, 34, 31, 77, 42, - 31, 30, 31, 45, 46, 27, 38, 64, 77, 46, 34, 31, - 45, 31, 77, 49, 41, 44, 30, 45, 77, 27, 42, 42, - 31, 27, 44, 65, 79, 77, 77, 77, 77, 77, 77, 77, - 77, 13, 51, 77, 40, 27, 39, 31, 77, 35, 45, 77, - 15, 52, 51, 39, 27, 40, 30, 35, 27, 45, 64, 77, - 11, 35, 40, 33, 77, 41, 32, 77, 11, 35, 40, 33, - 45, 66, 79, 77, 77, 77, 77, 77, 77, 77, 77, 12, - 41, 41, 37, 77, 41, 40, 77, 39, 51, 77, 23, 41, - 44, 37, 45, 64, 77, 51, 31, 77, 13, 35, 33, 34, - 46, 51, 64, 77, 27, 40, 30, 77, 30, 31, 45, 42, - 27, 35, 44, 67, 79, 77, 77, 77, 77, 77, 77, 77, - 77, 14, 41, 46, 34, 35, 40, 33, 77, 28, 31, 45, - 35, 30, 31, 77, 44, 31, 39, 27, 35, 40, 45, 63, - 77, 18, 41, 47, 40, 30, 77, 46, 34, 31, 77, 30, - 31, 29, 27, 51, 79, 77, 77, 77, 77, 77, 77, 77, - 77, 15, 32, 77, 46, 34, 27, 46, 77, 29, 41, 38, - 41, 45, 45, 27, 38, 77, 23, 44, 31, 29, 37, 64, - 77, 28, 41, 47, 40, 30, 38, 31, 45, 45, 77, 27, - 40, 30, 77, 28, 27, 44, 31, 79, 77, 77, 77, 77, - 77, 77, 77, 77, 20, 34, 31, 77, 38, 41, 40, 31, - 77, 27, 40, 30, 77, 38, 31, 48, 31, 38, 77, 45, - 27, 40, 30, 45, 77, 45, 46, 44, 31, 46, 29, 34, - 77, 32, 27, 44, 77, 27, 49, 27, 51, 79, 77, 77, - 77, 77, 77, 77, 77, 77]]), - torch.tensor([[0, 0, 0, 1069, 11, -1, -1, -1, -1]]), - torch.tensor([[0, 0, 0, 1069, 11, -1, -1, -1, -1]]), - ] - # fmt: on - self.assertTrue(torch.allclose(tokens[0], EXPECTED_OUTPUT[0])) - self.assertTrue(torch.allclose(tokens[1], EXPECTED_OUTPUT[1])) - self.assertTrue(torch.allclose(tokens[2], EXPECTED_OUTPUT[2])) diff --git a/tests/models/mega/__init__.py b/tests/models/mega/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/mega/test_modeling_mega.py b/tests/models/mega/test_modeling_mega.py deleted file mode 100644 index 872f0a38af..0000000000 --- a/tests/models/mega/test_modeling_mega.py +++ /dev/null @@ -1,744 +0,0 @@ -# coding=utf-8 -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import unittest - -from transformers import MegaConfig, is_torch_available -from transformers.testing_utils import ( - TestCasePlus, - is_flaky, - require_torch, - require_torch_fp16, - slow, - torch_device, -) - -from ...generation.test_utils import GenerationTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import ( - MegaForCausalLM, - MegaForMaskedLM, - MegaForMultipleChoice, - MegaForQuestionAnswering, - MegaForSequenceClassification, - MegaForTokenClassification, - MegaModel, - ) - - -class MegaModelTester: - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=2, - intermediate_size=37, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_positions=1024, - bidirectional=False, # needed for decoding, and can't modify common generation tests; test separately by overriding - ema_projection_size=16, - shared_representation_size=64, - use_chunking=False, - chunk_size=32, - attention_activation="softmax", - use_normalized_ffn=True, - nffn_hidden_size=24, - add_token_type_embeddings=True, - type_vocab_size=2, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.add_token_type_embeddings = add_token_type_embeddings - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_positions = max_positions - self.bidirectional = bidirectional - self.ema_projection_size = ema_projection_size - self.shared_representation_size = shared_representation_size - self.use_chunking = use_chunking - self.chunk_size = chunk_size - self.attention_activation = attention_activation - self.use_normalized_ffn = use_normalized_ffn - self.nffn_hidden_size = nffn_hidden_size - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.num_attention_heads = 1 - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - - token_type_ids = None - if self.add_token_type_embeddings: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = self.get_config() - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def get_config(self): - return MegaConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - intermediate_size=self.intermediate_size, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - # added args - add_token_type_embeddings=self.add_token_type_embeddings, - max_positions=self.max_positions, - bidirectional=self.bidirectional, - ema_projection_size=self.ema_projection_size, - shared_representation_size=self.shared_representation_size, - use_chunking=self.use_chunking, - chunk_size=self.chunk_size, - attention_activation=self.attention_activation, - use_normalized_ffn=self.use_normalized_ffn, - nffn_hidden_size=self.nffn_hidden_size, - ) - - def get_pipeline_config(self): - config = self.get_config() - config.vocab_size = 300 - return config - - def prepare_config_and_inputs_for_decoder(self): - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = self.prepare_config_and_inputs() - - config.is_decoder = True - config.bidirectional = False - encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size]) - encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - return ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) - - def create_and_check_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = MegaModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - result = model(input_ids, token_type_ids=token_type_ids) - result = model(input_ids) - - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) - - def create_and_check_model_as_decoder( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.add_cross_attention = True - model = MegaModel(config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - ) - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - encoder_hidden_states=encoder_hidden_states, - ) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) - - def create_and_check_for_causal_lm( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - model = MegaForCausalLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_decoder_model_past_large_inputs( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.is_decoder = True - config.bidirectional = False - config.add_cross_attention = True - model = MegaForCausalLM(config=config).to(torch_device).eval() - - # make sure that ids don't start with pad token - mask = input_ids.ne(config.pad_token_id).long() - input_ids = input_ids * mask - - # first forward pass - outputs = model( - input_ids, - attention_mask=input_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - use_cache=True, - ) - past_key_values = outputs.past_key_values - - # create hypothetical multiple next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # make sure that ids don't start with pad token - mask = next_tokens.ne(config.pad_token_id).long() - next_tokens = next_tokens * mask - next_mask = ids_tensor((self.batch_size, 1), vocab_size=2) - - # append to next input_ids and - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - next_attention_mask = torch.cat([input_mask, next_mask], dim=-1) - - output_from_no_past = model( - next_input_ids, - attention_mask=next_attention_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - output_hidden_states=True, - )["hidden_states"][0] - output_from_past = model( - next_tokens, - attention_mask=next_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - past_key_values=past_key_values, - output_hidden_states=True, - )["hidden_states"][0] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -1:, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, :, random_slice_idx].detach() - - self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1]) - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def create_and_check_decoder_model_with_chunking( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.use_chunking = True - config.output_attentions = True - config.attention_activation = "laplace" - config.chunk_size = input_ids.size(1) * 2 - - model = MegaForCausalLM(config).to(torch_device).eval() - - input_ids = input_ids.repeat(1, 8) - # multiply the sequence length by 8 since we repeat the same ids 8 times in input_ids - input_mask = random_attention_mask([self.batch_size, self.seq_length * 8]) - - result = model(input_ids, attention_mask=input_mask) - - # test if the sequence length of attentions is same provided chunk_size - self.parent.assertEqual(result["attentions"][0].shape[-1], config.chunk_size) - - def create_and_check_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = MegaForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = MegaForTokenClassification(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) - - def create_and_check_for_multiple_choice( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = MegaForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - result = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - token_type_ids=multiple_choice_token_type_ids, - labels=choice_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) - - def create_and_check_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = MegaForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - # extra checks for Mega-specific model functionality - def create_and_check_bidirectionality( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.bidirectional = True - model = MegaModel(config) - model.to(torch_device) - model.eval() - # no mask - result = model(input_ids) - # with mask & token types - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - - self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def check_chunking_shorter_sequence( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.use_chunking = True - config.chunk_size = input_ids.size(1) + 25 - model = MegaModel(config) - model.to(torch_device) - model.eval() - - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - - self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def check_chunking_longer_sequence( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.use_chunking = True - - # we want the chunk size to be < sequence length, and the sequence length to be a multiple of chunk size - config.chunk_size = input_ids.size(1) * 2 - model = MegaModel(config) - model.to(torch_device) - model.eval() - - result = model( - input_ids.repeat(1, 8), - ) - - self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length * 8, self.hidden_size)) - - def check_laplace_self_attention( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.attention_activation = "laplace" - model = MegaModel(config) - model.to(torch_device) - model.eval() - - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - - self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def check_relu2_self_attention( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.attention_activation = "relu2" - model = MegaModel(config) - model.to(torch_device) - model.eval() - - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - - self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def check_sequence_length_beyond_max_positions( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.max_positions = self.seq_length - 2 - model = MegaModel(config) - model.to(torch_device) - model.eval() - - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - - self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - - -@require_torch -class MegaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - ( - MegaForCausalLM, - MegaForMaskedLM, - MegaModel, - MegaForSequenceClassification, - MegaForTokenClassification, - MegaForMultipleChoice, - MegaForQuestionAnswering, - ) - if is_torch_available() - else () - ) - all_generative_model_classes = (MegaForCausalLM,) if is_torch_available() else () - pipeline_model_mapping = ( - { - "feature-extraction": MegaModel, - "fill-mask": MegaForMaskedLM, - "question-answering": MegaForQuestionAnswering, - "text-classification": MegaForSequenceClassification, - "text-generation": MegaForCausalLM, - "token-classification": MegaForTokenClassification, - "zero-shot": MegaForSequenceClassification, - } - if is_torch_available() - else {} - ) - - fx_compatible = False - test_head_masking = False - test_pruning = False - - def setUp(self): - self.model_tester = MegaModelTester(self) - self.config_tester = ConfigTester(self, config_class=MegaConfig, hidden_size=37) - - # TODO: @ydshieh - @is_flaky(description="Sometimes gives `AssertionError` on expected outputs") - def test_pipeline_fill_mask(self): - super().test_pipeline_fill_mask() - - # TODO: @ydshieh - @is_flaky( - description="Sometimes gives `RuntimeError: probability tensor contains either `inf`, `nan` or element < 0`" - ) - def test_pipeline_text_generation(self): - super().test_pipeline_text_generation() - - def test_config(self): - self.config_tester.run_common_tests() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_model_as_decoder(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) - - def test_model_as_decoder_with_default_input_mask(self): - # This regression test was failing with PyTorch < 1.3 - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) = self.model_tester.prepare_config_and_inputs_for_decoder() - - input_mask = None - - self.model_tester.create_and_check_model_as_decoder( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) - - def test_for_causal_lm(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_for_causal_lm(*config_and_inputs) - - def test_decoder_model_past_with_large_inputs(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs) - - def test_decoder_model_with_chunking(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_decoder_model_with_chunking(*config_and_inputs) - - def test_for_masked_lm(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) - - def test_for_token_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_token_classification(*config_and_inputs) - - def test_for_multiple_choice(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - - def test_for_question_answering(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_question_answering(*config_and_inputs) - - def test_for_bidirectionality(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_bidirectionality(*config_and_inputs) - - def test_for_chunking_shorter_sequence(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.check_chunking_shorter_sequence(*config_and_inputs) - - def test_for_chunking_longer_sequence(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.check_chunking_longer_sequence(*config_and_inputs) - - def test_for_laplace_attention(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.check_laplace_self_attention(*config_and_inputs) - - def test_for_relu2_attention(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.check_relu2_self_attention(*config_and_inputs) - - def test_for_sequence_length_beyond_max_positions(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.check_sequence_length_beyond_max_positions(*config_and_inputs) - - @require_torch_fp16 - def test_generate_fp16(self): - config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs_for_decoder() - # attention_mask = torch.LongTensor(input_ids.ne(1)).to(torch_device) - model = MegaForCausalLM(config).eval().to(torch_device) - model.half() - model.generate(input_ids, attention_mask=attention_mask) - model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) - - def test_sequence_classification_model(self): - config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs() - config.num_labels = self.model_tester.num_labels - sequence_labels = ids_tensor([self.model_tester.batch_size], self.model_tester.type_sequence_label_size) - model = MegaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) - self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) - - def test_sequence_classification_model_for_multi_label(self): - config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs() - config.num_labels = self.model_tester.num_labels - config.problem_type = "multi_label_classification" - sequence_labels = ids_tensor( - [self.model_tester.batch_size, config.num_labels], self.model_tester.type_sequence_label_size - ).to(torch.float) - model = MegaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) - self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) - - @slow - def test_model_from_pretrained(self): - model_name = "mnaylor/mega-base-wikitext" - model = MegaModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - @unittest.skip(reason="Does not work on the tiny model as we keep hitting edge cases.") - def test_cpu_offload(self): - super().test_cpu_offload() - - @unittest.skip(reason="Does not work on the tiny model as we keep hitting edge cases.") - def test_disk_offload(self): - super().test_disk_offload() - - @unittest.skip(reason="Does not work on the tiny model as we keep hitting edge cases.") - def test_model_parallelism(self): - super().test_model_parallelism() - - @unittest.skip( - reason=( - "Calling `self.attention_function` in `MegaMovingAverageGatedAttention.forward` changes the submodules on " - "device 1 to device 0 (also changes `requires_grad`). No idea how this could happen for now." - ) - ) - def test_multi_gpu_data_parallel_forward(self): - super().test_multi_gpu_data_parallel_forward() - - @unittest.skip(reason="Tracing of the dynamically computed `MegaMultiDimensionDampedEma._kernel` doesn't work.") - def test_torchscript_simple(self): - super().test_torchscript_simple() - - @unittest.skip(reason="Tracing of the dynamically computed `MegaMultiDimensionDampedEma._kernel` doesn't work.") - def test_torchscript_output_hidden_state(self): - super().test_torchscript_output_hidden_state() - - @unittest.skip(reason="Tracing of the dynamically computed `MegaMultiDimensionDampedEma._kernel` doesn't work.") - def test_torchscript_output_attentions(self): - super().test_torchscript_output_attentions() - - -@require_torch -class MegaModelIntegrationTest(TestCasePlus): - @slow - def test_inference_masked_lm(self): - model = MegaForMaskedLM.from_pretrained("mnaylor/mega-base-wikitext") - - input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) - with torch.no_grad(): - output = model(input_ids)[0] - expected_shape = torch.Size((1, 11, 50265)) - self.assertEqual(output.shape, expected_shape) - # compare the actual values for a slice. - expected_slice = torch.tensor( - [[[67.8389, 10.1470, -32.7148], [-11.1655, 29.1152, 23.1304], [-3.8015, 66.0397, 29.6733]]] - ) - - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) - - @slow - def test_inference_no_head(self): - model = MegaModel.from_pretrained("mnaylor/mega-base-wikitext") - - input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) - with torch.no_grad(): - output = model(input_ids)[0] - expected_shape = torch.Size((1, 11, 128)) - self.assertEqual(output.shape, expected_shape) - # compare the actual values for a slice. taken from output[:, :3, :3] - expected_slice = torch.tensor( - [[[1.1767, -0.6349, 2.8494], [-0.5109, -0.7745, 1.9495], [-0.3287, -0.2111, 3.3367]]] - ) - - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) diff --git a/tests/models/nat/__init__.py b/tests/models/nat/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/nat/test_modeling_nat.py b/tests/models/nat/test_modeling_nat.py deleted file mode 100644 index c04472620b..0000000000 --- a/tests/models/nat/test_modeling_nat.py +++ /dev/null @@ -1,382 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch Nat model.""" - -import collections -import unittest - -from transformers import NatConfig -from transformers.testing_utils import require_natten, require_torch, require_vision, slow, torch_device -from transformers.utils import cached_property, is_torch_available, is_vision_available - -from ...test_backbone_common import BackboneTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - from torch import nn - - from transformers import NatBackbone, NatForImageClassification, NatModel - -if is_vision_available(): - from PIL import Image - - from transformers import AutoImageProcessor - - -class NatModelTester: - def __init__( - self, - parent, - batch_size=13, - image_size=64, - patch_size=4, - num_channels=3, - embed_dim=16, - depths=[1, 2, 1], - num_heads=[2, 4, 8], - kernel_size=3, - mlp_ratio=2.0, - qkv_bias=True, - hidden_dropout_prob=0.0, - attention_probs_dropout_prob=0.0, - drop_path_rate=0.1, - hidden_act="gelu", - patch_norm=True, - initializer_range=0.02, - layer_norm_eps=1e-5, - is_training=True, - scope=None, - use_labels=True, - num_labels=10, - out_features=["stage1", "stage2"], - out_indices=[1, 2], - ): - self.parent = parent - self.batch_size = batch_size - self.image_size = image_size - self.patch_size = patch_size - self.num_channels = num_channels - self.embed_dim = embed_dim - self.depths = depths - self.num_heads = num_heads - self.kernel_size = kernel_size - self.mlp_ratio = mlp_ratio - self.qkv_bias = qkv_bias - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.drop_path_rate = drop_path_rate - self.hidden_act = hidden_act - self.patch_norm = patch_norm - self.layer_norm_eps = layer_norm_eps - self.initializer_range = initializer_range - self.is_training = is_training - self.scope = scope - self.use_labels = use_labels - self.num_labels = num_labels - self.out_features = out_features - self.out_indices = out_indices - - def prepare_config_and_inputs(self): - pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) - - labels = None - if self.use_labels: - labels = ids_tensor([self.batch_size], self.num_labels) - - config = self.get_config() - - return config, pixel_values, labels - - def get_config(self): - return NatConfig( - num_labels=self.num_labels, - image_size=self.image_size, - patch_size=self.patch_size, - num_channels=self.num_channels, - embed_dim=self.embed_dim, - depths=self.depths, - num_heads=self.num_heads, - kernel_size=self.kernel_size, - mlp_ratio=self.mlp_ratio, - qkv_bias=self.qkv_bias, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - drop_path_rate=self.drop_path_rate, - hidden_act=self.hidden_act, - patch_norm=self.patch_norm, - layer_norm_eps=self.layer_norm_eps, - initializer_range=self.initializer_range, - out_features=self.out_features, - out_indices=self.out_indices, - ) - - def create_and_check_model(self, config, pixel_values, labels): - model = NatModel(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values) - - expected_height = expected_width = (config.image_size // config.patch_size) // (2 ** (len(config.depths) - 1)) - expected_dim = int(config.embed_dim * 2 ** (len(config.depths) - 1)) - - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, expected_height, expected_width, expected_dim) - ) - - def create_and_check_for_image_classification(self, config, pixel_values, labels): - model = NatForImageClassification(config) - model.to(torch_device) - model.eval() - result = model(pixel_values, labels=labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - # test greyscale images - config.num_channels = 1 - model = NatForImageClassification(config) - model.to(torch_device) - model.eval() - - pixel_values = floats_tensor([self.batch_size, 1, self.image_size, self.image_size]) - result = model(pixel_values) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - def create_and_check_backbone(self, config, pixel_values, labels): - model = NatBackbone(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values) - - # verify hidden states - self.parent.assertEqual(len(result.feature_maps), len(config.out_features)) - self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, model.channels[0], 16, 16]) - - # verify channels - self.parent.assertEqual(len(model.channels), len(config.out_features)) - - # verify backbone works with out_features=None - config.out_features = None - model = NatBackbone(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values) - - # verify feature maps - self.parent.assertEqual(len(result.feature_maps), 1) - self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, model.channels[-1], 4, 4]) - - # verify channels - self.parent.assertEqual(len(model.channels), 1) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - config, pixel_values, labels = config_and_inputs - inputs_dict = {"pixel_values": pixel_values} - return config, inputs_dict - - -@require_natten -@require_torch -class NatModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - ( - NatModel, - NatForImageClassification, - NatBackbone, - ) - if is_torch_available() - else () - ) - pipeline_model_mapping = ( - {"image-feature-extraction": NatModel, "image-classification": NatForImageClassification} - if is_torch_available() - else {} - ) - fx_compatible = False - - test_torchscript = False - test_pruning = False - test_resize_embeddings = False - test_head_masking = False - - def setUp(self): - self.model_tester = NatModelTester(self) - self.config_tester = ConfigTester(self, config_class=NatConfig, embed_dim=37) - - def test_config(self): - self.create_and_test_config_common_properties() - self.config_tester.create_and_test_config_to_json_string() - self.config_tester.create_and_test_config_to_json_file() - self.config_tester.create_and_test_config_from_and_save_pretrained() - self.config_tester.create_and_test_config_with_num_labels() - self.config_tester.check_config_can_be_init_without_params() - self.config_tester.check_config_arguments_init() - - def create_and_test_config_common_properties(self): - return - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_for_image_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_classification(*config_and_inputs) - - def test_backbone(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_backbone(*config_and_inputs) - - @unittest.skip(reason="Nat does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - @unittest.skip(reason="Nat does not use feedforward chunking") - def test_feed_forward_chunking(self): - pass - - def test_model_common_attributes(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) - x = model.get_output_embeddings() - self.assertTrue(x is None or isinstance(x, nn.Linear)) - - def test_attention_outputs(self): - self.skipTest("Nat's attention operation is handled entirely by NATTEN.") - - def check_hidden_states_output(self, inputs_dict, config, model_class, image_size): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", len(self.model_tester.depths) + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - # Nat has a different seq_length - patch_size = ( - config.patch_size - if isinstance(config.patch_size, collections.abc.Iterable) - else (config.patch_size, config.patch_size) - ) - - height = image_size[0] // patch_size[0] - width = image_size[1] // patch_size[1] - - self.assertListEqual( - list(hidden_states[0].shape[-3:]), - [height, width, self.model_tester.embed_dim], - ) - - if model_class.__name__ != "NatBackbone": - reshaped_hidden_states = outputs.reshaped_hidden_states - self.assertEqual(len(reshaped_hidden_states), expected_num_layers) - - batch_size, num_channels, height, width = reshaped_hidden_states[0].shape - reshaped_hidden_states = ( - reshaped_hidden_states[0].view(batch_size, num_channels, height, width).permute(0, 2, 3, 1) - ) - self.assertListEqual( - list(reshaped_hidden_states.shape[-3:]), - [height, width, self.model_tester.embed_dim], - ) - - def test_hidden_states_output(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - image_size = ( - self.model_tester.image_size - if isinstance(self.model_tester.image_size, collections.abc.Iterable) - else (self.model_tester.image_size, self.model_tester.image_size) - ) - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - self.check_hidden_states_output(inputs_dict, config, model_class, image_size) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - self.check_hidden_states_output(inputs_dict, config, model_class, image_size) - - @slow - def test_model_from_pretrained(self): - model_name = "shi-labs/nat-mini-in1k-224" - model = NatModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - def test_initialization(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - configs_no_init = _config_zero_init(config) - for model_class in self.all_model_classes: - model = model_class(config=configs_no_init) - for name, param in model.named_parameters(): - if "embeddings" not in name and param.requires_grad: - self.assertIn( - ((param.data.mean() * 1e9).round() / 1e9).item(), - [0.0, 1.0], - msg=f"Parameter {name} of model {model_class} seems not properly initialized", - ) - - -@require_natten -@require_vision -@require_torch -class NatModelIntegrationTest(unittest.TestCase): - @cached_property - def default_image_processor(self): - return AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224") if is_vision_available() else None - - @slow - def test_inference_image_classification_head(self): - model = NatForImageClassification.from_pretrained("shi-labs/nat-mini-in1k-224").to(torch_device) - image_processor = self.default_image_processor - - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - - # forward pass - with torch.no_grad(): - outputs = model(**inputs) - - # verify the logits - expected_shape = torch.Size((1, 1000)) - self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = torch.tensor([0.3805, -0.8676, -0.3912]).to(torch_device) - self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4)) - - -@require_torch -@require_natten -class NatBackboneTest(unittest.TestCase, BackboneTesterMixin): - all_model_classes = (NatBackbone,) if is_torch_available() else () - config_class = NatConfig - - def setUp(self): - self.model_tester = NatModelTester(self) diff --git a/tests/models/nezha/__init__.py b/tests/models/nezha/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/nezha/test_modeling_nezha.py b/tests/models/nezha/test_modeling_nezha.py deleted file mode 100644 index 311866758b..0000000000 --- a/tests/models/nezha/test_modeling_nezha.py +++ /dev/null @@ -1,489 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import tempfile -import unittest - -from transformers import NezhaConfig, is_torch_available -from transformers.models.auto import get_values -from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device - -from ...generation.test_utils import GenerationTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import ( - MODEL_FOR_PRETRAINING_MAPPING, - NezhaForMaskedLM, - NezhaForMultipleChoice, - NezhaForNextSentencePrediction, - NezhaForPreTraining, - NezhaForQuestionAnswering, - NezhaForSequenceClassification, - NezhaForTokenClassification, - NezhaModel, - ) - - -class NezhaModelTester: - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=128, - max_relative_position=32, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = self.get_config() - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def get_config(self): - """ - Returns a tiny configuration by default. - """ - return NezhaConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - is_decoder=False, - initializer_range=self.initializer_range, - ) - - def prepare_config_and_inputs_for_decoder(self): - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = self.prepare_config_and_inputs() - - config.is_decoder = True - encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size]) - encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - return ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) - - def create_and_check_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = NezhaModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - result = model(input_ids, token_type_ids=token_type_ids) - result = model(input_ids) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) - - def create_and_check_model_as_decoder( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.add_cross_attention = True - model = NezhaModel(config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - ) - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - encoder_hidden_states=encoder_hidden_states, - ) - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) - - def create_and_check_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = NezhaForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_for_next_sequence_prediction( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = NezhaForNextSentencePrediction(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - labels=sequence_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, 2)) - - def create_and_check_for_pretraining( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = NezhaForPreTraining(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - labels=token_labels, - next_sentence_label=sequence_labels, - ) - self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - self.parent.assertEqual(result.seq_relationship_logits.shape, (self.batch_size, 2)) - - def create_and_check_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = NezhaForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - def create_and_check_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = NezhaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - def create_and_check_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = NezhaForTokenClassification(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) - - def create_and_check_for_multiple_choice( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = NezhaForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - result = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - token_type_ids=multiple_choice_token_type_ids, - labels=choice_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - - -@require_torch -class NezhaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - ( - NezhaModel, - NezhaForMaskedLM, - NezhaForMultipleChoice, - NezhaForNextSentencePrediction, - NezhaForPreTraining, - NezhaForQuestionAnswering, - NezhaForSequenceClassification, - NezhaForTokenClassification, - ) - if is_torch_available() - else () - ) - pipeline_model_mapping = ( - { - "feature-extraction": NezhaModel, - "fill-mask": NezhaForMaskedLM, - "question-answering": NezhaForQuestionAnswering, - "text-classification": NezhaForSequenceClassification, - "token-classification": NezhaForTokenClassification, - "zero-shot": NezhaForSequenceClassification, - } - if is_torch_available() - else {} - ) - fx_compatible = True - - # special case for ForPreTraining model - def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): - inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) - - if return_labels: - if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): - inputs_dict["labels"] = torch.zeros( - (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device - ) - inputs_dict["next_sentence_label"] = torch.zeros( - self.model_tester.batch_size, dtype=torch.long, device=torch_device - ) - return inputs_dict - - def setUp(self): - self.model_tester = NezhaModelTester(self) - self.config_tester = ConfigTester(self, config_class=NezhaConfig, hidden_size=37) - - def test_config(self): - self.config_tester.run_common_tests() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_model_as_decoder(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) - - def test_model_as_decoder_with_default_input_mask(self): - # This regression test was failing with PyTorch < 1.3 - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) = self.model_tester.prepare_config_and_inputs_for_decoder() - - input_mask = None - - self.model_tester.create_and_check_model_as_decoder( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) - - def test_for_masked_lm(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) - - def test_for_multiple_choice(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - - def test_for_next_sequence_prediction(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_next_sequence_prediction(*config_and_inputs) - - def test_for_pretraining(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_pretraining(*config_and_inputs) - - def test_for_question_answering(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_question_answering(*config_and_inputs) - - def test_for_sequence_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs) - - def test_for_token_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_token_classification(*config_and_inputs) - - @slow - def test_model_from_pretrained(self): - model_name = "sijunhe/nezha-cn-base" - model = NezhaModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - @slow - @require_torch_gpu - def test_torchscript_device_change(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - for model_class in self.all_model_classes: - # NezhaForMultipleChoice behaves incorrectly in JIT environments. - if model_class == NezhaForMultipleChoice: - return - - config.torchscript = True - model = model_class(config=config) - - inputs_dict = self._prepare_for_class(inputs_dict, model_class) - traced_model = torch.jit.trace( - model, (inputs_dict["input_ids"].to("cpu"), inputs_dict["attention_mask"].to("cpu")) - ) - - with tempfile.TemporaryDirectory() as tmp: - torch.jit.save(traced_model, os.path.join(tmp, "bert.pt")) - loaded = torch.jit.load(os.path.join(tmp, "bert.pt"), map_location=torch_device) - loaded(inputs_dict["input_ids"].to(torch_device), inputs_dict["attention_mask"].to(torch_device)) - - -@require_torch -class NezhaModelIntegrationTest(unittest.TestCase): - @slow - def test_inference_nezha_model(self): - model = NezhaModel.from_pretrained("sijunhe/nezha-cn-base") - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) - attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1]]) - with torch.no_grad(): - output = model(input_ids, attention_mask=attention_mask)[0] - expected_shape = torch.Size((1, 6, 768)) - self.assertEqual(output.shape, expected_shape) - expected_slice = torch.tensor([[[0.0685, 0.2441, 0.1102], [0.0600, 0.1906, 0.1349], [0.0221, 0.0819, 0.0586]]]) - - self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) - - @slow - def test_inference_nezha_masked_lm(self): - model = NezhaForMaskedLM.from_pretrained("sijunhe/nezha-cn-base") - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) - attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1]]) - with torch.no_grad(): - output = model(input_ids, attention_mask=attention_mask)[0] - expected_shape = torch.Size((1, 6, 21128)) - self.assertEqual(output.shape, expected_shape) - expected_slice = torch.tensor( - [[-2.7939, -1.7902, -2.2189], [-2.8585, -1.8908, -2.3723], [-2.6499, -1.7750, -2.2558]] - ) - - self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) diff --git a/tests/models/qdqbert/__init__.py b/tests/models/qdqbert/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/qdqbert/test_modeling_qdqbert.py b/tests/models/qdqbert/test_modeling_qdqbert.py deleted file mode 100644 index 96e63834ec..0000000000 --- a/tests/models/qdqbert/test_modeling_qdqbert.py +++ /dev/null @@ -1,573 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The HuggingFace Inc. team. All rights reserved. -# Copyright 2021 NVIDIA Corporation. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch QDQBERT model.""" - -import unittest - -from transformers import QDQBertConfig, is_torch_available -from transformers.testing_utils import require_pytorch_quantization, require_torch, slow, torch_device - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import ( - QDQBertForMaskedLM, - QDQBertForMultipleChoice, - QDQBertForNextSentencePrediction, - QDQBertForQuestionAnswering, - QDQBertForSequenceClassification, - QDQBertForTokenClassification, - QDQBertLMHeadModel, - QDQBertModel, - ) - - -class QDQBertModelTester: - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - # Set default quantizers before creating the model. - import pytorch_quantization.nn as quant_nn - from pytorch_quantization.tensor_quant import QuantDescriptor - - # The default tensor quantizer is set to use Max calibration method - input_desc = QuantDescriptor(num_bits=8, calib_method="max") - # The default tensor quantizer is set to be per-channel quantization for weights - weight_desc = QuantDescriptor(num_bits=8, axis=((0,))) - quant_nn.QuantLinear.set_default_quant_desc_input(input_desc) - quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc) - # For the test cases, since QDQBert model is tested in one run without calibration, the quantized tensors are set as fake quantized tensors which give float type tensors in the end. - quant_nn.TensorQuantizer.use_fb_fake_quant = True - - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = self.get_config() - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def get_config(self): - return QDQBertConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - is_decoder=False, - initializer_range=self.initializer_range, - ) - - def prepare_config_and_inputs_for_decoder(self): - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = self.prepare_config_and_inputs() - - config.is_decoder = True - encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size]) - encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - return ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) - - def create_and_check_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = QDQBertModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - result = model(input_ids, token_type_ids=token_type_ids) - result = model(input_ids) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_model_as_decoder( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.add_cross_attention = True - model = QDQBertModel(config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - ) - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - encoder_hidden_states=encoder_hidden_states, - ) - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_for_causal_lm( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - model = QDQBertLMHeadModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = QDQBertForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_model_for_causal_lm_as_decoder( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.add_cross_attention = True - model = QDQBertLMHeadModel(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - labels=token_labels, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - ) - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - labels=token_labels, - encoder_hidden_states=encoder_hidden_states, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_decoder_model_past_large_inputs( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.is_decoder = True - config.add_cross_attention = True - model = QDQBertLMHeadModel(config=config) - model.to(torch_device) - model.eval() - - # first forward pass - outputs = model( - input_ids, - attention_mask=input_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - use_cache=True, - ) - past_key_values = outputs.past_key_values - - # create hypothetical multiple next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size) - next_mask = ids_tensor((self.batch_size, 3), vocab_size=2) - - # append to next input_ids and - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - next_attention_mask = torch.cat([input_mask, next_mask], dim=-1) - - output_from_no_past = model( - next_input_ids, - attention_mask=next_attention_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - output_hidden_states=True, - )["hidden_states"][0] - output_from_past = model( - next_tokens, - attention_mask=next_attention_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - past_key_values=past_key_values, - output_hidden_states=True, - )["hidden_states"][0] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, :, random_slice_idx].detach() - - self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1]) - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def create_and_check_for_next_sequence_prediction( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = QDQBertForNextSentencePrediction(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - labels=sequence_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, 2)) - - def create_and_check_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = QDQBertForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - def create_and_check_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = QDQBertForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - def create_and_check_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = QDQBertForTokenClassification(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) - - def create_and_check_for_multiple_choice( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = QDQBertForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - result = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - token_type_ids=multiple_choice_token_type_ids, - labels=choice_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - - -@require_torch -@require_pytorch_quantization -class QDQBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - ( - QDQBertModel, - QDQBertForMaskedLM, - QDQBertForMultipleChoice, - QDQBertForNextSentencePrediction, - QDQBertForQuestionAnswering, - QDQBertForSequenceClassification, - QDQBertForTokenClassification, - QDQBertLMHeadModel, - ) - if is_torch_available() - else () - ) - all_generative_model_classes = (QDQBertLMHeadModel,) if is_torch_available() else () - pipeline_model_mapping = ( - { - "feature-extraction": QDQBertModel, - "fill-mask": QDQBertForMaskedLM, - "question-answering": QDQBertForQuestionAnswering, - "text-classification": QDQBertForSequenceClassification, - "text-generation": QDQBertLMHeadModel, - "token-classification": QDQBertForTokenClassification, - "zero-shot": QDQBertForSequenceClassification, - } - if is_torch_available() - else {} - ) - - def setUp(self): - self.model_tester = QDQBertModelTester(self) - self.config_tester = ConfigTester(self, config_class=QDQBertConfig, hidden_size=37) - - def test_config(self): - self.config_tester.run_common_tests() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_model_various_embeddings(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - for type in ["absolute", "relative_key", "relative_key_query"]: - config_and_inputs[0].position_embedding_type = type - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_model_as_decoder(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) - - def test_model_as_decoder_with_default_input_mask(self): - # This regression test was failing with PyTorch < 1.3 - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) = self.model_tester.prepare_config_and_inputs_for_decoder() - - input_mask = None - - self.model_tester.create_and_check_model_as_decoder( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ) - - def test_for_causal_lm(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_for_causal_lm(*config_and_inputs) - - def test_for_masked_lm(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) - - def test_for_causal_lm_decoder(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_model_for_causal_lm_as_decoder(*config_and_inputs) - - def test_decoder_model_past_with_large_inputs(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() - self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs) - - def test_for_multiple_choice(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs) - - def test_for_next_sequence_prediction(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_next_sequence_prediction(*config_and_inputs) - - def test_for_question_answering(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_question_answering(*config_and_inputs) - - def test_for_sequence_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs) - - def test_for_token_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_token_classification(*config_and_inputs) - - @slow - def test_model_from_pretrained(self): - model_name = "google-bert/bert-base-uncased" - model = QDQBertModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - # Override - def test_feed_forward_chunking(self): - # feed forward chunking is not supported in QDQBert - pass - - -@require_torch -@require_pytorch_quantization -class QDQBertModelIntegrationTest(unittest.TestCase): - @slow - def test_inference_no_head_absolute_embedding(self): - # Set default quantizers before creating the model. - import pytorch_quantization.nn as quant_nn - from pytorch_quantization.tensor_quant import QuantDescriptor - - # The default tensor quantizer is set to use Max calibration method - input_desc = QuantDescriptor(num_bits=8, calib_method="max") - # The default tensor quantizer is set to be per-channel quantization for weights - weight_desc = QuantDescriptor(num_bits=8, axis=((0,))) - quant_nn.QuantLinear.set_default_quant_desc_input(input_desc) - quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc) - - model = QDQBertModel.from_pretrained("google-bert/bert-base-uncased") - input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) - output = model(input_ids, attention_mask=attention_mask)[0] - expected_shape = torch.Size((1, 11, 768)) - self.assertEqual(output.shape, expected_shape) - expected_slice = torch.tensor( - [[[0.4571, -0.0735, 0.8594], [0.2774, -0.0278, 0.8794], [0.3548, -0.0473, 0.7593]]] - ) - self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) diff --git a/tests/models/realm/__init__.py b/tests/models/realm/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/realm/test_modeling_realm.py b/tests/models/realm/test_modeling_realm.py deleted file mode 100644 index 07a3b9d4b3..0000000000 --- a/tests/models/realm/test_modeling_realm.py +++ /dev/null @@ -1,554 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch REALM model.""" - -import copy -import unittest - -import numpy as np - -from transformers import RealmConfig, is_torch_available -from transformers.testing_utils import require_torch, slow, torch_device - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import ( - RealmEmbedder, - RealmForOpenQA, - RealmKnowledgeAugEncoder, - RealmReader, - RealmRetriever, - RealmScorer, - RealmTokenizer, - ) - - -class RealmModelTester: - def __init__( - self, - parent, - batch_size=13, - retriever_proj_size=128, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - layer_norm_eps=1e-12, - span_hidden_size=50, - max_span_width=10, - reader_layer_norm_eps=1e-3, - reader_beam_size=4, - reader_seq_len=288 + 32, - num_block_records=13353718, - searcher_beam_size=8, - searcher_seq_len=64, - num_labels=3, - num_choices=4, - num_candidates=10, - scope=None, - ): - # General config - self.parent = parent - self.batch_size = batch_size - self.retriever_proj_size = retriever_proj_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.layer_norm_eps = layer_norm_eps - - # Reader config - self.span_hidden_size = span_hidden_size - self.max_span_width = max_span_width - self.reader_layer_norm_eps = reader_layer_norm_eps - self.reader_beam_size = reader_beam_size - self.reader_seq_len = reader_seq_len - - # Searcher config - self.num_block_records = num_block_records - self.searcher_beam_size = searcher_beam_size - self.searcher_seq_len = searcher_seq_len - - self.num_labels = num_labels - self.num_choices = num_choices - self.num_candidates = num_candidates - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - candiate_input_ids = ids_tensor([self.batch_size, self.num_candidates, self.seq_length], self.vocab_size) - reader_input_ids = ids_tensor([self.reader_beam_size, self.reader_seq_len], self.vocab_size) - - input_mask = None - candiate_input_mask = None - reader_input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - candiate_input_mask = random_attention_mask([self.batch_size, self.num_candidates, self.seq_length]) - reader_input_mask = random_attention_mask([self.reader_beam_size, self.reader_seq_len]) - - token_type_ids = None - candidate_token_type_ids = None - reader_token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - candidate_token_type_ids = ids_tensor( - [self.batch_size, self.num_candidates, self.seq_length], self.type_vocab_size - ) - reader_token_type_ids = ids_tensor([self.reader_beam_size, self.reader_seq_len], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = self.get_config() - - # inputs with additional num_candidates axis. - scorer_encoder_inputs = (candiate_input_ids, candiate_input_mask, candidate_token_type_ids) - # reader inputs - reader_inputs = (reader_input_ids, reader_input_mask, reader_token_type_ids) - - return ( - config, - input_ids, - token_type_ids, - input_mask, - scorer_encoder_inputs, - reader_inputs, - sequence_labels, - token_labels, - choice_labels, - ) - - def get_config(self): - return RealmConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - retriever_proj_size=self.retriever_proj_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - num_candidates=self.num_candidates, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - def create_and_check_embedder( - self, - config, - input_ids, - token_type_ids, - input_mask, - scorer_encoder_inputs, - reader_inputs, - sequence_labels, - token_labels, - choice_labels, - ): - model = RealmEmbedder(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - self.parent.assertEqual(result.projected_score.shape, (self.batch_size, self.retriever_proj_size)) - - def create_and_check_encoder( - self, - config, - input_ids, - token_type_ids, - input_mask, - scorer_encoder_inputs, - reader_inputs, - sequence_labels, - token_labels, - choice_labels, - ): - model = RealmKnowledgeAugEncoder(config=config) - model.to(torch_device) - model.eval() - relevance_score = floats_tensor([self.batch_size, self.num_candidates]) - result = model( - scorer_encoder_inputs[0], - attention_mask=scorer_encoder_inputs[1], - token_type_ids=scorer_encoder_inputs[2], - relevance_score=relevance_score, - labels=token_labels, - ) - self.parent.assertEqual( - result.logits.shape, (self.batch_size * self.num_candidates, self.seq_length, self.vocab_size) - ) - - def create_and_check_reader( - self, - config, - input_ids, - token_type_ids, - input_mask, - scorer_encoder_inputs, - reader_inputs, - sequence_labels, - token_labels, - choice_labels, - ): - model = RealmReader(config=config) - model.to(torch_device) - model.eval() - relevance_score = floats_tensor([self.reader_beam_size]) - result = model( - reader_inputs[0], - attention_mask=reader_inputs[1], - token_type_ids=reader_inputs[2], - relevance_score=relevance_score, - ) - self.parent.assertEqual(result.block_idx.shape, ()) - self.parent.assertEqual(result.candidate.shape, ()) - self.parent.assertEqual(result.start_pos.shape, ()) - self.parent.assertEqual(result.end_pos.shape, ()) - - def create_and_check_scorer( - self, - config, - input_ids, - token_type_ids, - input_mask, - scorer_encoder_inputs, - reader_inputs, - sequence_labels, - token_labels, - choice_labels, - ): - model = RealmScorer(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - candidate_input_ids=scorer_encoder_inputs[0], - candidate_attention_mask=scorer_encoder_inputs[1], - candidate_token_type_ids=scorer_encoder_inputs[2], - ) - self.parent.assertEqual(result.relevance_score.shape, (self.batch_size, self.num_candidates)) - self.parent.assertEqual(result.query_score.shape, (self.batch_size, self.retriever_proj_size)) - self.parent.assertEqual( - result.candidate_score.shape, (self.batch_size, self.num_candidates, self.retriever_proj_size) - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - scorer_encoder_inputs, - reader_inputs, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - - -@require_torch -class RealmModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - ( - RealmEmbedder, - RealmKnowledgeAugEncoder, - # RealmScorer is excluded from common tests as it is a container model - # consisting of two RealmEmbedders & a simple inner product calculation. - # RealmScorer - ) - if is_torch_available() - else () - ) - all_generative_model_classes = () - pipeline_model_mapping = {} if is_torch_available() else {} - - # disable these tests because there is no base_model in Realm - test_save_load_fast_init_from_base = False - test_save_load_fast_init_to_base = False - - def setUp(self): - self.test_pruning = False - self.model_tester = RealmModelTester(self) - self.config_tester = ConfigTester(self, config_class=RealmConfig) - - def test_config(self): - self.config_tester.run_common_tests() - - def test_embedder(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_embedder(*config_and_inputs) - - def test_encoder(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_encoder(*config_and_inputs) - - def test_model_various_embeddings(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - for type in ["absolute", "relative_key", "relative_key_query"]: - config_and_inputs[0].position_embedding_type = type - self.model_tester.create_and_check_embedder(*config_and_inputs) - self.model_tester.create_and_check_encoder(*config_and_inputs) - - def test_scorer(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_scorer(*config_and_inputs) - - def test_training(self): - if not self.model_tester.is_training: - return - - config, *inputs = self.model_tester.prepare_config_and_inputs() - input_ids, token_type_ids, input_mask, scorer_encoder_inputs = inputs[0:4] - config.return_dict = True - - tokenizer = RealmTokenizer.from_pretrained("google/realm-orqa-nq-openqa") - - # RealmKnowledgeAugEncoder training - model = RealmKnowledgeAugEncoder(config) - model.to(torch_device) - model.train() - - inputs_dict = { - "input_ids": scorer_encoder_inputs[0].to(torch_device), - "attention_mask": scorer_encoder_inputs[1].to(torch_device), - "token_type_ids": scorer_encoder_inputs[2].to(torch_device), - "relevance_score": floats_tensor([self.model_tester.batch_size, self.model_tester.num_candidates]), - } - inputs_dict["labels"] = torch.zeros( - (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device - ) - inputs = inputs_dict - loss = model(**inputs).loss - loss.backward() - - # RealmForOpenQA training - openqa_config = copy.deepcopy(config) - openqa_config.vocab_size = 30522 # the retrieved texts will inevitably have more than 99 vocabs. - openqa_config.num_block_records = 5 - openqa_config.searcher_beam_size = 2 - - block_records = np.array( - [ - b"This is the first record.", - b"This is the second record.", - b"This is the third record.", - b"This is the fourth record.", - b"This is the fifth record.", - ], - dtype=object, - ) - retriever = RealmRetriever(block_records, tokenizer) - model = RealmForOpenQA(openqa_config, retriever) - model.to(torch_device) - model.train() - - inputs_dict = { - "input_ids": input_ids[:1].to(torch_device), - "attention_mask": input_mask[:1].to(torch_device), - "token_type_ids": token_type_ids[:1].to(torch_device), - "answer_ids": input_ids[:1].tolist(), - } - inputs = self._prepare_for_class(inputs_dict, RealmForOpenQA) - loss = model(**inputs).reader_output.loss - loss.backward() - - # Test model.block_embedding_to - device = torch.device("cpu") - model.block_embedding_to(device) - loss = model(**inputs).reader_output.loss - loss.backward() - self.assertEqual(model.block_emb.device.type, device.type) - - @slow - def test_embedder_from_pretrained(self): - model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder") - self.assertIsNotNone(model) - - @slow - def test_encoder_from_pretrained(self): - model = RealmKnowledgeAugEncoder.from_pretrained("google/realm-cc-news-pretrained-encoder") - self.assertIsNotNone(model) - - @slow - def test_open_qa_from_pretrained(self): - model = RealmForOpenQA.from_pretrained("google/realm-orqa-nq-openqa") - self.assertIsNotNone(model) - - @slow - def test_reader_from_pretrained(self): - model = RealmReader.from_pretrained("google/realm-orqa-nq-reader") - self.assertIsNotNone(model) - - @slow - def test_scorer_from_pretrained(self): - model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer") - self.assertIsNotNone(model) - - -@require_torch -class RealmModelIntegrationTest(unittest.TestCase): - @slow - def test_inference_embedder(self): - retriever_projected_size = 128 - - model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder") - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) - output = model(input_ids)[0] - - expected_shape = torch.Size((1, retriever_projected_size)) - self.assertEqual(output.shape, expected_shape) - - expected_slice = torch.tensor([[-0.0714, -0.0837, -0.1314]]) - self.assertTrue(torch.allclose(output[:, :3], expected_slice, atol=1e-4)) - - @slow - def test_inference_encoder(self): - num_candidates = 2 - vocab_size = 30522 - - model = RealmKnowledgeAugEncoder.from_pretrained( - "google/realm-cc-news-pretrained-encoder", num_candidates=num_candidates - ) - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) - relevance_score = torch.tensor([[0.3, 0.7]], dtype=torch.float32) - output = model(input_ids, relevance_score=relevance_score)[0] - - expected_shape = torch.Size((2, 6, vocab_size)) - self.assertEqual(output.shape, expected_shape) - - expected_slice = torch.tensor([[[-11.0888, -11.2544], [-10.2170, -10.3874]]]) - - self.assertTrue(torch.allclose(output[1, :2, :2], expected_slice, atol=1e-4)) - - @slow - def test_inference_open_qa(self): - from transformers.models.realm.retrieval_realm import RealmRetriever - - tokenizer = RealmTokenizer.from_pretrained("google/realm-orqa-nq-openqa") - retriever = RealmRetriever.from_pretrained("google/realm-orqa-nq-openqa") - - model = RealmForOpenQA.from_pretrained( - "google/realm-orqa-nq-openqa", - retriever=retriever, - ) - - question = "Who is the pioneer in modern computer science?" - - question = tokenizer( - [question], - padding=True, - truncation=True, - max_length=model.config.searcher_seq_len, - return_tensors="pt", - ).to(model.device) - - predicted_answer_ids = model(**question).predicted_answer_ids - - predicted_answer = tokenizer.decode(predicted_answer_ids) - self.assertEqual(predicted_answer, "alan mathison turing") - - @slow - def test_inference_reader(self): - config = RealmConfig(reader_beam_size=2, max_span_width=3) - model = RealmReader.from_pretrained("google/realm-orqa-nq-reader", config=config) - - concat_input_ids = torch.arange(10).view((2, 5)) - concat_token_type_ids = torch.tensor([[0, 0, 1, 1, 1], [0, 0, 1, 1, 1]], dtype=torch.int64) - concat_block_mask = torch.tensor([[0, 0, 1, 1, 0], [0, 0, 1, 1, 0]], dtype=torch.int64) - relevance_score = torch.tensor([0.3, 0.7], dtype=torch.float32) - - output = model( - concat_input_ids, - token_type_ids=concat_token_type_ids, - relevance_score=relevance_score, - block_mask=concat_block_mask, - return_dict=True, - ) - - block_idx_expected_shape = torch.Size(()) - start_pos_expected_shape = torch.Size((1,)) - end_pos_expected_shape = torch.Size((1,)) - self.assertEqual(output.block_idx.shape, block_idx_expected_shape) - self.assertEqual(output.start_pos.shape, start_pos_expected_shape) - self.assertEqual(output.end_pos.shape, end_pos_expected_shape) - - expected_block_idx = torch.tensor(1) - expected_start_pos = torch.tensor(3) - expected_end_pos = torch.tensor(3) - - self.assertTrue(torch.allclose(output.block_idx, expected_block_idx, atol=1e-4)) - self.assertTrue(torch.allclose(output.start_pos, expected_start_pos, atol=1e-4)) - self.assertTrue(torch.allclose(output.end_pos, expected_end_pos, atol=1e-4)) - - @slow - def test_inference_scorer(self): - num_candidates = 2 - - model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer", num_candidates=num_candidates) - - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) - candidate_input_ids = torch.tensor([[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) - output = model(input_ids, candidate_input_ids=candidate_input_ids)[0] - - expected_shape = torch.Size((1, 2)) - self.assertEqual(output.shape, expected_shape) - - expected_slice = torch.tensor([[0.7410, 0.7170]]) - self.assertTrue(torch.allclose(output, expected_slice, atol=1e-4)) diff --git a/tests/models/realm/test_retrieval_realm.py b/tests/models/realm/test_retrieval_realm.py deleted file mode 100644 index ba65a6afdd..0000000000 --- a/tests/models/realm/test_retrieval_realm.py +++ /dev/null @@ -1,187 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import shutil -import tempfile -from unittest import TestCase -from unittest.mock import patch - -import numpy as np -from datasets import Dataset - -from transformers.models.realm.configuration_realm import RealmConfig -from transformers.models.realm.retrieval_realm import _REALM_BLOCK_RECORDS_FILENAME, RealmRetriever -from transformers.models.realm.tokenization_realm import VOCAB_FILES_NAMES, RealmTokenizer - - -class RealmRetrieverTest(TestCase): - def setUp(self): - self.tmpdirname = tempfile.mkdtemp() - self.num_block_records = 5 - - # Realm tok - vocab_tokens = [ - "[UNK]", - "[CLS]", - "[SEP]", - "[PAD]", - "[MASK]", - "test", - "question", - "this", - "is", - "the", - "first", - "second", - "third", - "fourth", - "fifth", - "record", - "want", - "##want", - "##ed", - "wa", - "un", - "runn", - "##ing", - ",", - "low", - "lowest", - ] - realm_tokenizer_path = os.path.join(self.tmpdirname, "realm_tokenizer") - os.makedirs(realm_tokenizer_path, exist_ok=True) - self.vocab_file = os.path.join(realm_tokenizer_path, VOCAB_FILES_NAMES["vocab_file"]) - with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) - - realm_block_records_path = os.path.join(self.tmpdirname, "realm_block_records") - os.makedirs(realm_block_records_path, exist_ok=True) - - def get_tokenizer(self) -> RealmTokenizer: - return RealmTokenizer.from_pretrained(os.path.join(self.tmpdirname, "realm_tokenizer")) - - def tearDown(self): - shutil.rmtree(self.tmpdirname) - - def get_config(self): - config = RealmConfig(num_block_records=self.num_block_records) - return config - - def get_dummy_dataset(self): - dataset = Dataset.from_dict( - { - "id": ["0", "1"], - "question": ["foo", "bar"], - "answers": [["Foo", "Bar"], ["Bar"]], - } - ) - return dataset - - def get_dummy_block_records(self): - block_records = np.array( - [ - b"This is the first record", - b"This is the second record", - b"This is the third record", - b"This is the fourth record", - b"This is the fifth record", - b"This is a longer longer longer record", - ], - dtype=object, - ) - return block_records - - def get_dummy_retriever(self): - retriever = RealmRetriever( - block_records=self.get_dummy_block_records(), - tokenizer=self.get_tokenizer(), - ) - return retriever - - def test_retrieve(self): - config = self.get_config() - retriever = self.get_dummy_retriever() - tokenizer = retriever.tokenizer - - retrieved_block_ids = np.array([0, 3], dtype="long") - question_input_ids = tokenizer(["Test question"]).input_ids - answer_ids = tokenizer( - ["the fourth"], - add_special_tokens=False, - return_token_type_ids=False, - return_attention_mask=False, - ).input_ids - max_length = config.reader_seq_len - - has_answers, start_pos, end_pos, concat_inputs = retriever( - retrieved_block_ids, question_input_ids, answer_ids=answer_ids, max_length=max_length, return_tensors="np" - ) - - self.assertEqual(len(has_answers), 2) - self.assertEqual(len(start_pos), 2) - self.assertEqual(len(end_pos), 2) - self.assertEqual(concat_inputs.input_ids.shape, (2, 10)) - self.assertEqual(concat_inputs.attention_mask.shape, (2, 10)) - self.assertEqual(concat_inputs.token_type_ids.shape, (2, 10)) - self.assertEqual(concat_inputs.special_tokens_mask.shape, (2, 10)) - self.assertEqual( - tokenizer.convert_ids_to_tokens(concat_inputs.input_ids[0]), - ["[CLS]", "test", "question", "[SEP]", "this", "is", "the", "first", "record", "[SEP]"], - ) - self.assertEqual( - tokenizer.convert_ids_to_tokens(concat_inputs.input_ids[1]), - ["[CLS]", "test", "question", "[SEP]", "this", "is", "the", "fourth", "record", "[SEP]"], - ) - - def test_block_has_answer(self): - config = self.get_config() - retriever = self.get_dummy_retriever() - tokenizer = retriever.tokenizer - - retrieved_block_ids = np.array([0, 3, 5], dtype="long") - question_input_ids = tokenizer(["Test question"]).input_ids - answer_ids = tokenizer( - ["the fourth", "longer longer"], - add_special_tokens=False, - return_token_type_ids=False, - return_attention_mask=False, - ).input_ids - max_length = config.reader_seq_len - - has_answers, start_pos, end_pos, _ = retriever( - retrieved_block_ids, question_input_ids, answer_ids=answer_ids, max_length=max_length, return_tensors="np" - ) - - self.assertEqual([False, True, True], has_answers) - self.assertEqual([[-1, -1, -1], [6, -1, -1], [6, 7, 8]], start_pos) - self.assertEqual([[-1, -1, -1], [7, -1, -1], [7, 8, 9]], end_pos) - - def test_save_load_pretrained(self): - retriever = self.get_dummy_retriever() - retriever.save_pretrained(os.path.join(self.tmpdirname, "realm_block_records")) - - # Test local path - retriever = retriever.from_pretrained(os.path.join(self.tmpdirname, "realm_block_records")) - self.assertEqual(retriever.block_records[0], b"This is the first record") - - # Test mocked remote path - with patch("transformers.models.realm.retrieval_realm.hf_hub_download") as mock_hf_hub_download: - mock_hf_hub_download.return_value = os.path.join( - os.path.join(self.tmpdirname, "realm_block_records"), _REALM_BLOCK_RECORDS_FILENAME - ) - retriever = RealmRetriever.from_pretrained("google/realm-cc-news-pretrained-openqa") - - self.assertEqual(retriever.block_records[0], b"This is the first record") diff --git a/tests/models/realm/test_tokenization_realm.py b/tests/models/realm/test_tokenization_realm.py deleted file mode 100644 index 85c478837e..0000000000 --- a/tests/models/realm/test_tokenization_realm.py +++ /dev/null @@ -1,322 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import unittest - -from transformers import RealmTokenizerFast -from transformers.models.bert.tokenization_bert import ( - VOCAB_FILES_NAMES, - BasicTokenizer, - WordpieceTokenizer, - _is_control, - _is_punctuation, - _is_whitespace, -) -from transformers.models.realm.tokenization_realm import RealmTokenizer -from transformers.testing_utils import require_tokenizers, slow - -from ...test_tokenization_common import TokenizerTesterMixin, filter_non_english - - -@require_tokenizers -class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase): - from_pretrained_id = "google/realm-cc-news-pretrained-embedder" - tokenizer_class = RealmTokenizer - rust_tokenizer_class = RealmTokenizerFast - test_rust_tokenizer = True - space_between_special_tokens = True - from_pretrained_filter = filter_non_english - - def setUp(self): - super().setUp() - - vocab_tokens = [ - "[UNK]", - "[CLS]", - "[SEP]", - "[PAD]", - "[MASK]", - "want", - "##want", - "##ed", - "wa", - "un", - "runn", - "##ing", - ",", - "low", - "lowest", - ] - self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"]) - with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) - - def get_input_output_texts(self, tokenizer): - input_text = "UNwant\u00e9d,running" - output_text = "unwanted, running" - return input_text, output_text - - def test_full_tokenizer(self): - tokenizer = self.tokenizer_class(self.vocab_file) - - tokens = tokenizer.tokenize("UNwant\u00e9d,running") - self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) - self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11]) - - def test_rust_and_python_full_tokenizers(self): - if not self.test_rust_tokenizer: - return - - tokenizer = self.get_tokenizer() - rust_tokenizer = self.get_rust_tokenizer() - - sequence = "UNwant\u00e9d,running" - - tokens = tokenizer.tokenize(sequence) - rust_tokens = rust_tokenizer.tokenize(sequence) - self.assertListEqual(tokens, rust_tokens) - - ids = tokenizer.encode(sequence, add_special_tokens=False) - rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False) - self.assertListEqual(ids, rust_ids) - - rust_tokenizer = self.get_rust_tokenizer() - ids = tokenizer.encode(sequence) - rust_ids = rust_tokenizer.encode(sequence) - self.assertListEqual(ids, rust_ids) - - # With lower casing - tokenizer = self.get_tokenizer(do_lower_case=True) - rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True) - - sequence = "UNwant\u00e9d,running" - - tokens = tokenizer.tokenize(sequence) - rust_tokens = rust_tokenizer.tokenize(sequence) - self.assertListEqual(tokens, rust_tokens) - - ids = tokenizer.encode(sequence, add_special_tokens=False) - rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False) - self.assertListEqual(ids, rust_ids) - - rust_tokenizer = self.get_rust_tokenizer() - ids = tokenizer.encode(sequence) - rust_ids = rust_tokenizer.encode(sequence) - self.assertListEqual(ids, rust_ids) - - def test_chinese(self): - tokenizer = BasicTokenizer() - - self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"]) - - def test_basic_tokenizer_lower(self): - tokenizer = BasicTokenizer(do_lower_case=True) - - self.assertListEqual( - tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"] - ) - self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) - - def test_basic_tokenizer_lower_strip_accents_false(self): - tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False) - - self.assertListEqual( - tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"] - ) - self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"]) - - def test_basic_tokenizer_lower_strip_accents_true(self): - tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True) - - self.assertListEqual( - tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] - ) - self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) - - def test_basic_tokenizer_lower_strip_accents_default(self): - tokenizer = BasicTokenizer(do_lower_case=True) - - self.assertListEqual( - tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"] - ) - self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"]) - - def test_basic_tokenizer_no_lower(self): - tokenizer = BasicTokenizer(do_lower_case=False) - - self.assertListEqual( - tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["HeLLo", "!", "how", "Are", "yoU", "?"] - ) - - def test_basic_tokenizer_no_lower_strip_accents_false(self): - tokenizer = BasicTokenizer(do_lower_case=False, strip_accents=False) - - self.assertListEqual( - tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["HäLLo", "!", "how", "Are", "yoU", "?"] - ) - - def test_basic_tokenizer_no_lower_strip_accents_true(self): - tokenizer = BasicTokenizer(do_lower_case=False, strip_accents=True) - - self.assertListEqual( - tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["HaLLo", "!", "how", "Are", "yoU", "?"] - ) - - def test_basic_tokenizer_respects_never_split_tokens(self): - tokenizer = BasicTokenizer(do_lower_case=False, never_split=["[UNK]"]) - - self.assertListEqual( - tokenizer.tokenize(" \tHeLLo!how \n Are yoU? [UNK]"), ["HeLLo", "!", "how", "Are", "yoU", "?", "[UNK]"] - ) - - def test_wordpiece_tokenizer(self): - vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"] - - vocab = {} - for i, token in enumerate(vocab_tokens): - vocab[token] = i - tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]") - - self.assertListEqual(tokenizer.tokenize(""), []) - - self.assertListEqual(tokenizer.tokenize("unwanted running"), ["un", "##want", "##ed", "runn", "##ing"]) - - self.assertListEqual(tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"]) - - def test_is_whitespace(self): - self.assertTrue(_is_whitespace(" ")) - self.assertTrue(_is_whitespace("\t")) - self.assertTrue(_is_whitespace("\r")) - self.assertTrue(_is_whitespace("\n")) - self.assertTrue(_is_whitespace("\u00a0")) - - self.assertFalse(_is_whitespace("A")) - self.assertFalse(_is_whitespace("-")) - - def test_is_control(self): - self.assertTrue(_is_control("\u0005")) - - self.assertFalse(_is_control("A")) - self.assertFalse(_is_control(" ")) - self.assertFalse(_is_control("\t")) - self.assertFalse(_is_control("\r")) - - def test_is_punctuation(self): - self.assertTrue(_is_punctuation("-")) - self.assertTrue(_is_punctuation("$")) - self.assertTrue(_is_punctuation("`")) - self.assertTrue(_is_punctuation(".")) - - self.assertFalse(_is_punctuation("A")) - self.assertFalse(_is_punctuation(" ")) - - def test_clean_text(self): - tokenizer = self.get_tokenizer() - - # Example taken from the issue https://github.com/huggingface/tokenizers/issues/340 - self.assertListEqual([tokenizer.tokenize(t) for t in ["Test", "\xad", "test"]], [["[UNK]"], [], ["[UNK]"]]) - - if self.test_rust_tokenizer: - rust_tokenizer = self.get_rust_tokenizer() - self.assertListEqual( - [rust_tokenizer.tokenize(t) for t in ["Test", "\xad", "test"]], [["[UNK]"], [], ["[UNK]"]] - ) - - @slow - def test_sequence_builders(self): - tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased") - - text = tokenizer.encode("sequence builders", add_special_tokens=False) - text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False) - - encoded_sentence = tokenizer.build_inputs_with_special_tokens(text) - encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2) - - assert encoded_sentence == [101] + text + [102] - assert encoded_pair == [101] + text + [102] + text_2 + [102] - - def test_offsets_with_special_characters(self): - for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): - tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) - - sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence." - tokens = tokenizer_r.encode_plus( - sentence, - return_attention_mask=False, - return_token_type_ids=False, - return_offsets_mapping=True, - add_special_tokens=True, - ) - - do_lower_case = tokenizer_r.do_lower_case if hasattr(tokenizer_r, "do_lower_case") else False - expected_results = ( - [ - ((0, 0), tokenizer_r.cls_token), - ((0, 1), "A"), - ((1, 2), ","), - ((3, 5), "na"), - ((5, 6), "##ï"), - ((6, 8), "##ve"), - ((9, 15), tokenizer_r.mask_token), - ((16, 21), "Allen"), - ((21, 23), "##NL"), - ((23, 24), "##P"), - ((25, 33), "sentence"), - ((33, 34), "."), - ((0, 0), tokenizer_r.sep_token), - ] - if not do_lower_case - else [ - ((0, 0), tokenizer_r.cls_token), - ((0, 1), "a"), - ((1, 2), ","), - ((3, 8), "naive"), - ((9, 15), tokenizer_r.mask_token), - ((16, 21), "allen"), - ((21, 23), "##nl"), - ((23, 24), "##p"), - ((25, 33), "sentence"), - ((33, 34), "."), - ((0, 0), tokenizer_r.sep_token), - ] - ) - - self.assertEqual( - [e[1] for e in expected_results], tokenizer_r.convert_ids_to_tokens(tokens["input_ids"]) - ) - self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"]) - - @slow - def test_batch_encode_candidates(self): - for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): - tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) - tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) - text = [["Hello world!", "Nice to meet you!"], ["The cute cat.", "The adorable dog."]] - - encoded_sentence_r = tokenizer_r.batch_encode_candidates(text, max_length=10, return_tensors="np") - encoded_sentence_p = tokenizer_p.batch_encode_candidates(text, max_length=10, return_tensors="np") - - expected_shape = (2, 2, 10) - - self.assertEqual(encoded_sentence_r["input_ids"].shape, expected_shape) - self.assertEqual(encoded_sentence_r["attention_mask"].shape, expected_shape) - self.assertEqual(encoded_sentence_r["token_type_ids"].shape, expected_shape) - - self.assertEqual(encoded_sentence_p["input_ids"].shape, expected_shape) - self.assertEqual(encoded_sentence_p["attention_mask"].shape, expected_shape) - self.assertEqual(encoded_sentence_p["token_type_ids"].shape, expected_shape) diff --git a/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py b/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py index 08c3bc6178..d7b85e7b48 100644 --- a/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py +++ b/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py @@ -23,7 +23,6 @@ from transformers.testing_utils import require_deterministic_for_xpu, require_to from ...test_modeling_common import floats_tensor, ids_tensor, random_attention_mask from ..bert.test_modeling_bert import BertModelTester from ..speech_to_text.test_modeling_speech_to_text import Speech2TextModelTester -from ..speech_to_text_2.test_modeling_speech_to_text_2 import Speech2Text2StandaloneDecoderModelTester from ..wav2vec2.test_modeling_wav2vec2 import Wav2Vec2ModelTester @@ -33,7 +32,6 @@ if is_torch_available(): from transformers import ( BertLMHeadModel, - Speech2Text2ForCausalLM, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel, Wav2Vec2Model, @@ -583,43 +581,3 @@ class Speech2TextBertModelTest(EncoderDecoderMixin, unittest.TestCase): # all published pretrained models are Speech2TextModel != Speech2TextEncoder def test_real_model_save_load_from_pretrained(self): pass - - -@require_torch -class Wav2Vec2Speech2Text2(EncoderDecoderMixin, unittest.TestCase): - def get_encoder_decoder_model(self, config, decoder_config): - encoder_model = Wav2Vec2Model(config).eval() - decoder_model = Speech2Text2ForCausalLM(decoder_config).eval() - return encoder_model, decoder_model - - def prepare_config_and_inputs(self): - model_tester_encoder = Wav2Vec2ModelTester(self, batch_size=13) - model_tester_decoder = Speech2Text2StandaloneDecoderModelTester( - self, batch_size=13, d_model=32, max_position_embeddings=512 - ) - encoder_config_and_inputs = model_tester_encoder.prepare_config_and_inputs() - decoder_config_and_inputs = model_tester_decoder.prepare_config_and_inputs() - ( - config, - input_values, - input_mask, - ) = encoder_config_and_inputs - (decoder_config, decoder_input_ids, decoder_attention_mask, _) = decoder_config_and_inputs - - # make sure that cross attention layers are added - decoder_config.add_cross_attention = True - # disable cache for now - decoder_config.use_cache = False - return { - "config": config, - "input_values": input_values, - "attention_mask": input_mask, - "decoder_config": decoder_config, - "decoder_input_ids": decoder_input_ids, - "decoder_attention_mask": decoder_attention_mask, - "labels": decoder_input_ids, - } - - # there are no published pretrained Speech2Text2ForCausalLM for now - def test_real_model_save_load_from_pretrained(self): - pass diff --git a/tests/models/speech_to_text_2/__init__.py b/tests/models/speech_to_text_2/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py b/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py deleted file mode 100644 index fffa16aa30..0000000000 --- a/tests/models/speech_to_text_2/test_modeling_speech_to_text_2.py +++ /dev/null @@ -1,216 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch Speech2Text model.""" - -import unittest - -from transformers import Speech2Text2Config -from transformers.testing_utils import is_torch_available, require_torch, torch_device - -from ...generation.test_utils import GenerationTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers.models.speech_to_text_2.modeling_speech_to_text_2 import ( - Speech2Text2Decoder, - Speech2Text2ForCausalLM, - ) - - -@require_torch -class Speech2Text2StandaloneDecoderModelTester: - def __init__( - self, - parent, - vocab_size=99, - batch_size=13, - d_model=16, - decoder_seq_length=7, - is_training=True, - is_decoder=True, - use_attention_mask=True, - use_cache=False, - use_labels=True, - decoder_start_token_id=2, - decoder_ffn_dim=32, - decoder_layers=2, - decoder_attention_heads=4, - max_position_embeddings=30, - pad_token_id=0, - bos_token_id=1, - eos_token_id=2, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.decoder_seq_length = decoder_seq_length - # For common tests - self.seq_length = self.decoder_seq_length - self.is_training = is_training - self.use_attention_mask = use_attention_mask - self.use_labels = use_labels - - self.vocab_size = vocab_size - self.d_model = d_model - self.hidden_size = d_model - self.num_hidden_layers = decoder_layers - self.decoder_layers = decoder_layers - self.decoder_ffn_dim = decoder_ffn_dim - self.decoder_attention_heads = decoder_attention_heads - self.num_attention_heads = decoder_attention_heads - self.eos_token_id = eos_token_id - self.bos_token_id = bos_token_id - self.pad_token_id = pad_token_id - self.decoder_start_token_id = decoder_start_token_id - self.use_cache = use_cache - self.max_position_embeddings = max_position_embeddings - - self.scope = None - self.decoder_key_length = decoder_seq_length - self.base_model_out_len = 2 - self.decoder_attention_idx = 1 - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) - - attention_mask = None - if self.use_attention_mask: - attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2) - - lm_labels = None - if self.use_labels: - lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) - - config = Speech2Text2Config( - vocab_size=self.vocab_size, - d_model=self.d_model, - decoder_layers=self.decoder_layers, - decoder_ffn_dim=self.decoder_ffn_dim, - decoder_attention_heads=self.decoder_attention_heads, - eos_token_id=self.eos_token_id, - bos_token_id=self.bos_token_id, - use_cache=self.use_cache, - pad_token_id=self.pad_token_id, - decoder_start_token_id=self.decoder_start_token_id, - max_position_embeddings=self.max_position_embeddings, - ) - - return ( - config, - input_ids, - attention_mask, - lm_labels, - ) - - def create_and_check_decoder_model_past( - self, - config, - input_ids, - attention_mask, - lm_labels, - ): - config.use_cache = True - model = Speech2Text2Decoder(config=config).to(torch_device).eval() - input_ids = input_ids[:2] - - input_ids[input_ids == 0] += 1 - # first forward pass - outputs = model(input_ids, use_cache=True) - outputs_use_cache_conf = model(input_ids) - outputs_no_past = model(input_ids, use_cache=False) - - self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf)) - self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1) - - past_key_values = outputs["past_key_values"] - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((2, 1), config.vocab_size - 1) + 1 - - # append to next input_ids and - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - - print(next_input_ids) - - output_from_no_past = model(next_input_ids)["last_hidden_state"] - output_from_past = model(next_tokens, past_key_values=past_key_values)["last_hidden_state"] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, next_input_ids.shape[-1] - 1, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() - - # test that outputs are equal for slice - assert torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - attention_mask, - lm_labels, - ) = config_and_inputs - - inputs_dict = { - "input_ids": input_ids, - "attention_mask": attention_mask, - } - return config, inputs_dict - - -@require_torch -class Speech2Text2StandaloneDecoderModelTest( - ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase -): - all_model_classes = (Speech2Text2Decoder, Speech2Text2ForCausalLM) if is_torch_available() else () - all_generative_model_classes = (Speech2Text2ForCausalLM,) if is_torch_available() else () - pipeline_model_mapping = {"text-generation": Speech2Text2ForCausalLM} if is_torch_available() else {} - fx_compatible = True - test_pruning = False - - def setUp( - self, - ): - self.model_tester = Speech2Text2StandaloneDecoderModelTester(self, is_training=False) - self.config_tester = ConfigTester(self, config_class=Speech2Text2Config) - - # not implemented currently - def test_inputs_embeds(self): - pass - - # speech2text2 has no base model - def test_save_load_fast_init_from_base(self): - pass - - # speech2text2 has no base model - def test_save_load_fast_init_to_base(self): - pass - - def test_config(self): - self.config_tester.run_common_tests() - - def test_decoder_model_past(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_decoder_model_past(*config_and_inputs) - - # decoder cannot keep gradients - def test_retain_grad_hidden_states_attentions(self): - return diff --git a/tests/models/speech_to_text_2/test_tokenization_speech_to_text_2.py b/tests/models/speech_to_text_2/test_tokenization_speech_to_text_2.py deleted file mode 100644 index df433d67d9..0000000000 --- a/tests/models/speech_to_text_2/test_tokenization_speech_to_text_2.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2021 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import json -import os -import tempfile -import unittest - -from transformers.models.speech_to_text_2 import Speech2Text2Tokenizer -from transformers.models.speech_to_text_2.tokenization_speech_to_text_2 import VOCAB_FILES_NAMES - -from ...test_tokenization_common import TokenizerTesterMixin - - -class SpeechToTextTokenizerTest(TokenizerTesterMixin, unittest.TestCase): - from_pretrained_id = "facebook/s2t-wav2vec2-large-en-de" - tokenizer_class = Speech2Text2Tokenizer - test_rust_tokenizer = False - - def setUp(self): - super().setUp() - - vocab = " here@@ a couple of@@ words for the he@@ re@@ vocab".split(" ") - merges = ["he re 123", "here a 1456"] - vocab_tokens = dict(zip(vocab, range(len(vocab)))) - - self.special_tokens_map = {"pad_token": "", "unk_token": "", "bos_token": "", "eos_token": ""} - - self.tmpdirname = tempfile.mkdtemp() - self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"]) - self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) - - with open(self.vocab_file, "w", encoding="utf-8") as fp: - fp.write(json.dumps(vocab_tokens) + "\n") - - with open(self.merges_file, "w") as fp: - fp.write("\n".join(merges)) - - def test_get_vocab(self): - vocab_keys = list(self.get_tokenizer().get_vocab().keys()) - - self.assertEqual(vocab_keys[0], "") - self.assertEqual(vocab_keys[1], "") - self.assertEqual(vocab_keys[-1], "vocab") - self.assertEqual(len(vocab_keys), 14) - - def test_vocab_size(self): - self.assertEqual(self.get_tokenizer().vocab_size, 14) - - def test_tokenizer_decode(self): - tokenizer = Speech2Text2Tokenizer.from_pretrained(self.tmpdirname) - - # make sure @@ is correctly concatenated - token_ids = [4, 6, 8, 7, 10] # ["here@@", "couple", "words", "of@@", "the"] - output_string = tokenizer.decode(token_ids) - - self.assertTrue(output_string == "herecouple words ofthe") - - def test_load_no_merges_file(self): - tokenizer = Speech2Text2Tokenizer.from_pretrained(self.tmpdirname) - - with tempfile.TemporaryDirectory() as tmp_dirname: - tokenizer.save_pretrained(tmp_dirname) - os.remove(os.path.join(tmp_dirname, "merges.txt")) - - # load tokenizer without merges file should not throw an error - tokenizer = Speech2Text2Tokenizer.from_pretrained(tmp_dirname) - - with tempfile.TemporaryDirectory() as tmp_dirname: - # save tokenizer and load again - tokenizer.save_pretrained(tmp_dirname) - tokenizer = Speech2Text2Tokenizer.from_pretrained(tmp_dirname) - - self.assertIsNotNone(tokenizer) - - # overwrite since merges_file is optional - def test_tokenizer_slow_store_full_signature(self): - if not self.test_slow_tokenizer: - return - - signature = inspect.signature(self.tokenizer_class.__init__) - tokenizer = self.get_tokenizer() - - for parameter_name, parameter in signature.parameters.items(): - if parameter.default != inspect.Parameter.empty and parameter_name != "merges_file": - self.assertIn(parameter_name, tokenizer.init_kwargs) diff --git a/tests/models/tvlt/__init__.py b/tests/models/tvlt/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/tvlt/test_feature_extraction_tvlt.py b/tests/models/tvlt/test_feature_extraction_tvlt.py deleted file mode 100644 index a0b3f7a916..0000000000 --- a/tests/models/tvlt/test_feature_extraction_tvlt.py +++ /dev/null @@ -1,182 +0,0 @@ -# coding=utf-8 -# Copyright 2023 HuggingFace Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the TVLT feature extraction.""" - -import itertools -import random -import unittest - -import numpy as np - -from transformers import TvltFeatureExtractor, is_datasets_available -from transformers.testing_utils import require_torch, require_torchaudio -from transformers.utils.import_utils import is_torch_available - -from ...test_sequence_feature_extraction_common import SequenceFeatureExtractionTestMixin - - -if is_torch_available(): - import torch - -if is_datasets_available(): - from datasets import load_dataset - -global_rng = random.Random() - - -# Copied from tests.models.whisper.test_feature_extraction_whisper.floats_list -def floats_list(shape, scale=1.0, rng=None, name=None): - """Creates a random float32 tensor""" - if rng is None: - rng = global_rng - - values = [] - for batch_idx in range(shape[0]): - values.append([]) - for _ in range(shape[1]): - values[-1].append(rng.random() * scale) - - return values - - -class TvltFeatureExtractionTester(unittest.TestCase): - def __init__( - self, - parent, - batch_size=7, - min_seq_length=400, - max_seq_length=2000, - spectrogram_length=2048, - feature_size=128, - num_audio_channels=1, - hop_length=512, - chunk_length=30, - sampling_rate=44100, - ): - self.parent = parent - self.batch_size = batch_size - self.min_seq_length = min_seq_length - self.max_seq_length = max_seq_length - self.seq_length_diff = (self.max_seq_length - self.min_seq_length) // (self.batch_size - 1) - self.spectrogram_length = spectrogram_length - self.feature_size = feature_size - self.num_audio_channels = num_audio_channels - self.hop_length = hop_length - self.chunk_length = chunk_length - self.sampling_rate = sampling_rate - - def prepare_feat_extract_dict(self): - return { - "spectrogram_length": self.spectrogram_length, - "feature_size": self.feature_size, - "num_audio_channels": self.num_audio_channels, - "hop_length": self.hop_length, - "chunk_length": self.chunk_length, - "sampling_rate": self.sampling_rate, - } - - def prepare_inputs_for_common(self, equal_length=False, numpify=False): - def _flatten(list_of_lists): - return list(itertools.chain(*list_of_lists)) - - if equal_length: - speech_inputs = [floats_list((self.max_seq_length, self.feature_size)) for _ in range(self.batch_size)] - else: - # make sure that inputs increase in size - speech_inputs = [ - floats_list((x, self.feature_size)) - for x in range(self.min_seq_length, self.max_seq_length, self.seq_length_diff) - ] - if numpify: - speech_inputs = [np.asarray(x) for x in speech_inputs] - return speech_inputs - - -@require_torch -@require_torchaudio -class TvltFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.TestCase): - feature_extraction_class = TvltFeatureExtractor - - def setUp(self): - self.feat_extract_tester = TvltFeatureExtractionTester(self) - - def test_feat_extract_properties(self): - feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) - self.assertTrue(hasattr(feature_extractor, "spectrogram_length")) - self.assertTrue(hasattr(feature_extractor, "feature_size")) - self.assertTrue(hasattr(feature_extractor, "num_audio_channels")) - self.assertTrue(hasattr(feature_extractor, "hop_length")) - self.assertTrue(hasattr(feature_extractor, "chunk_length")) - self.assertTrue(hasattr(feature_extractor, "sampling_rate")) - - def test_call(self): - # Initialize feature_extractor - feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) - - # create three inputs of length 800, 1000, and 1200 - speech_inputs = [floats_list((1, x))[0] for x in range(800, 1400, 200)] - np_speech_inputs = [np.asarray(speech_input) for speech_input in speech_inputs] - - # Test not batched input - encoded_audios = feature_extractor(np_speech_inputs[0], return_tensors="np", sampling_rate=44100).audio_values - - self.assertTrue(encoded_audios.ndim == 4) - self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size) - self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length) - self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels) - - # Test batched - encoded_audios = feature_extractor(np_speech_inputs, return_tensors="np", sampling_rate=44100).audio_values - - self.assertTrue(encoded_audios.ndim == 4) - self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size) - self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length) - self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels) - - # Test audio masking - encoded_audios = feature_extractor( - np_speech_inputs, return_tensors="np", sampling_rate=44100, mask_audio=True - ).audio_values - - self.assertTrue(encoded_audios.ndim == 4) - self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size) - self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length) - self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels) - - # Test 2-D numpy arrays are batched. - speech_inputs = [floats_list((1, x))[0] for x in (800, 800, 800)] - np_speech_inputs = np.asarray(speech_inputs) - encoded_audios = feature_extractor(np_speech_inputs, return_tensors="np", sampling_rate=44100).audio_values - self.assertTrue(encoded_audios.ndim == 4) - self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size) - self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length) - self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels) - - def _load_datasamples(self, num_samples): - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") - # automatic decoding with librispeech - speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] - - return [x["array"] for x in speech_samples] - - def test_integration(self): - input_speech = self._load_datasamples(1) - feature_extractor = TvltFeatureExtractor() - audio_values = feature_extractor(input_speech, return_tensors="pt").audio_values - - self.assertEqual(audio_values.shape, (1, 1, 192, 128)) - - expected_slice = torch.tensor([[-0.3032, -0.2708], [-0.4434, -0.4007]]) - self.assertTrue(torch.allclose(audio_values[0, 0, :2, :2], expected_slice, atol=1e-4)) diff --git a/tests/models/tvlt/test_image_processor_tvlt.py b/tests/models/tvlt/test_image_processor_tvlt.py deleted file mode 100644 index c2974da6d8..0000000000 --- a/tests/models/tvlt/test_image_processor_tvlt.py +++ /dev/null @@ -1,294 +0,0 @@ -# coding=utf-8 -# Copyright 2023 HuggingFace Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the TVLT image processor.""" - -import unittest - -import numpy as np - -from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available - -from ...test_image_processing_common import ImageProcessingTestMixin - - -if is_torch_available(): - import torch - -if is_vision_available(): - from PIL import Image - - from transformers import TvltImageProcessor - - -def prepare_video(image_processor_tester, width=10, height=10, numpify=False, torchify=False): - """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors.""" - - video = [] - for i in range(image_processor_tester.num_frames): - video.append(np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8)) - - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video] - - if torchify: - video = [torch.from_numpy(frame) for frame in video] - - return video - - -def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if - one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True. - One can specify whether the videos are of the same resolution or not. - """ - - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - video_inputs = [] - for i in range(image_processor_tester.batch_size): - if equal_resolution: - width = height = image_processor_tester.max_resolution - else: - width, height = np.random.choice( - np.arange(image_processor_tester.min_resolution, image_processor_tester.max_resolution), 2 - ) - video = prepare_video( - image_processor_tester=image_processor_tester, - width=width, - height=height, - numpify=numpify, - torchify=torchify, - ) - video_inputs.append(video) - - return video_inputs - - -class TvltImageProcessorTester(unittest.TestCase): - def __init__( - self, - parent, - batch_size=7, - num_channels=3, - num_frames=4, - image_size=18, - min_resolution=30, - max_resolution=400, - do_resize=True, - size=None, - do_normalize=True, - image_mean=[0.5, 0.5, 0.5], - image_std=[0.5, 0.5, 0.5], - do_center_crop=True, - crop_size=None, - ): - size = size if size is not None else {"shortest_edge": 18} - crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18} - - self.parent = parent - self.batch_size = batch_size - self.num_channels = num_channels - self.num_frames = num_frames - self.image_size = image_size - self.min_resolution = min_resolution - self.max_resolution = max_resolution - self.do_resize = do_resize - self.size = size - self.do_normalize = do_normalize - self.image_mean = image_mean - self.image_std = image_std - self.do_center_crop = do_center_crop - self.crop_size = crop_size - - def prepare_image_processor_dict(self): - return { - "image_mean": self.image_mean, - "image_std": self.image_std, - "do_normalize": self.do_normalize, - "do_resize": self.do_resize, - "size": self.size, - "do_center_crop": self.do_center_crop, - "crop_size": self.crop_size, - } - - -@require_torch -@require_vision -class TvltImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase): - image_processing_class = TvltImageProcessor if is_vision_available() else None - - def setUp(self): - self.image_processor_tester = TvltImageProcessorTester(self) - - @property - def image_processor_dict(self): - return self.image_processor_tester.prepare_image_processor_dict() - - def test_image_processor_properties(self): - image_processor = self.image_processing_class(**self.image_processor_dict) - self.assertTrue(hasattr(image_processor, "image_mean")) - self.assertTrue(hasattr(image_processor, "image_std")) - self.assertTrue(hasattr(image_processor, "do_normalize")) - self.assertTrue(hasattr(image_processor, "do_resize")) - self.assertTrue(hasattr(image_processor, "do_center_crop")) - self.assertTrue(hasattr(image_processor, "size")) - - def test_call_pil(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PIL videos - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], Image.Image) - - # Test not batched input - encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], np.ndarray) - - # Test not batched input - encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy_4_channels(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - self.image_processor_tester.num_channels = 4 - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], np.ndarray) - - # Test not batched input - encoded_videos = image_processor( - video_inputs[0], return_tensors="pt", input_data_format="channels_first", image_mean=0, image_std=1 - ).pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processor( - video_inputs, return_tensors="pt", input_data_format="channels_first", image_mean=0, image_std=1 - ).pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - self.image_processor_tester.num_channels = 3 - - def test_call_pytorch(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], torch.Tensor) - - # Test not batched input - encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py deleted file mode 100644 index 574559a7a2..0000000000 --- a/tests/models/tvlt/test_modeling_tvlt.py +++ /dev/null @@ -1,625 +0,0 @@ -# coding=utf-8 -# Copyright 2023 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch TVLT model.""" - -import copy -import inspect -import unittest - -import numpy as np -from huggingface_hub import hf_hub_download - -from transformers import ( - TvltConfig, - is_datasets_available, - is_speech_available, - is_torch_available, - is_vision_available, -) -from transformers.testing_utils import require_torch, require_vision, slow, torch_device -from transformers.utils import cached_property - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, floats_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - import torch.nn as nn - - from transformers import TvltForAudioVisualClassification, TvltForPreTraining, TvltModel - - -if is_datasets_available(): - from datasets import load_dataset - -if is_vision_available(): - from transformers import TvltImageProcessor - -if is_speech_available(): - from transformers import TvltFeatureExtractor - - -class TvltModelTester: - def __init__( - self, - parent, - batch_size=2, - image_size=32, - spectrogram_length=32, - frequency_length=16, - image_patch_size=[2, 2], - audio_patch_size=[2, 2], - num_image_channels=3, - num_audio_channels=1, - num_frames=2, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=4, - intermediate_size=128, - hidden_act="gelu", - hidden_dropout_prob=0.0, - attention_probs_dropout_prob=0.0, - initializer_range=0.02, - layer_norm_eps=1e-12, - qkv_bias=True, - use_mean_pooling=True, - decoder_num_attention_heads=4, - decoder_hidden_size=32, - decoder_num_hidden_layers=2, - decoder_intermediate_size=128, - image_mask_ratio=0.75, - audio_mask_ratio=0.15, - audio_mask_type="frame-level", - task_matching=True, - task_mae=True, - num_labels=1, - is_training=True, - ): - self.parent = parent - self.batch_size = batch_size - self.image_size = image_size - self.spectrogram_length = spectrogram_length - self.frequency_length = frequency_length - self.image_patch_size = image_patch_size - self.audio_patch_size = audio_patch_size - self.num_image_channels = num_image_channels - self.num_audio_channels = num_audio_channels - self.num_frames = num_frames - - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.initializer_range = initializer_range - self.layer_norm_eps = layer_norm_eps - self.qkv_bias = qkv_bias - self.use_mean_pooling = use_mean_pooling - - self.decoder_num_attention_heads = decoder_num_attention_heads - self.decoder_hidden_size = decoder_hidden_size - self.decoder_num_hidden_layers = decoder_num_hidden_layers - self.decoder_intermediate_size = decoder_intermediate_size - self.image_mask_ratio = image_mask_ratio - self.audio_mask_ratio = audio_mask_ratio - - self.task_matching = task_matching - self.task_mae = task_mae - self.num_labels = num_labels - - self.expected_pixel_seq_len = (self.image_size // self.image_patch_size[0]) ** 2 * self.num_frames - self.expected_audio_seq_len = (self.spectrogram_length // self.audio_patch_size[0]) * ( - self.frequency_length // self.audio_patch_size[1] - ) - # we set the expected sequence length (which is used in several tests) - # this is equal to the seq length of number of image/video patches + number of audio patches - self.expected_seq_len = self.expected_pixel_seq_len + self.expected_audio_seq_len + 1 - - self.image_mae_output_dim = image_patch_size[0] ** 2 * num_image_channels - self.audio_mae_output_dim = audio_patch_size[0] * audio_patch_size[1] * num_audio_channels - self.is_training = is_training - - def prepare_config_and_inputs(self): - pixel_values = floats_tensor( - [self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size] - ) - audio_values = floats_tensor( - [self.batch_size, self.num_audio_channels, self.spectrogram_length, self.frequency_length] - ) - - pixel_mask = floats_tensor([self.batch_size, self.expected_pixel_seq_len]) - audio_mask = floats_tensor([self.batch_size, self.expected_audio_seq_len]) - - config = self.get_config() - - return (config, pixel_values, audio_values, pixel_mask, audio_mask) - - def prepare_config_and_inputs_for_pretraining(self): - pixel_values = floats_tensor( - [self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size] - ) - audio_values = floats_tensor( - [self.batch_size, self.num_audio_channels, self.spectrogram_length, self.frequency_length] - ) - - pixel_mask = floats_tensor([self.batch_size, self.expected_pixel_seq_len]) - audio_mask = floats_tensor([self.batch_size, self.expected_audio_seq_len]) - - pixel_values_mixed = floats_tensor( - [self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size] - ) - pixel_mask_mixed = floats_tensor([self.batch_size, self.expected_pixel_seq_len]) - labels = floats_tensor([self.batch_size]) - config = self.get_config() - - return ( - config, - pixel_values, - audio_values, - pixel_mask, - audio_mask, - pixel_values_mixed, - pixel_mask_mixed, - labels, - ) - - def get_config(self): - return TvltConfig( - image_size=self.image_size, - spectrogram_length=self.spectrogram_length, - frequency_length=self.frequency_length, - image_patch_size=self.image_patch_size, - audio_patch_size=self.audio_patch_size, - num_image_channels=self.num_image_channels, - num_audio_channels=self.num_audio_channels, - num_frames=self.num_frames, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - initializer_range=self.initializer_range, - layer_norm_eps=self.layer_norm_eps, - qkv_bias=self.qkv_bias, - use_mean_pooling=self.use_mean_pooling, - decoder_num_attention_heads=self.decoder_num_attention_heads, - decoder_hidden_size=self.decoder_hidden_size, - decoder_num_hidden_layers=self.decoder_num_hidden_layers, - decoder_intermediate_size=self.decoder_intermediate_size, - image_mask_ratio=self.image_mask_ratio, - audio_mask_ratio=self.audio_mask_ratio, - task_matching=self.task_matching, - task_mae=self.task_mae, - num_labels=self.num_labels, - ) - - def create_and_check_model(self, config, pixel_values, audio_values, pixel_mask, audio_mask): - model = TvltModel(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values, audio_values, pixel_mask=pixel_mask, audio_mask=audio_mask) - result = model(pixel_values, audio_values) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_len, self.hidden_size) - ) - - def create_and_check_for_audiovisual_classification( - self, config, pixel_values, audio_values, pixel_mask, audio_mask - ): - model = TvltForAudioVisualClassification(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values, audio_values, pixel_mask=pixel_mask, audio_mask=audio_mask) - result = model(pixel_values, audio_values) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - - def create_and_check_for_pretraining( - self, - config, - pixel_values, - audio_values, - pixel_mask, - audio_mask, - pixel_values_mixed, - pixel_mask_mixed, - labels, - ): - model = TvltForPreTraining(config=config) - model.to(torch_device) - model.train() - result = model( - pixel_values, - audio_values, - pixel_mask, - audio_mask, - pixel_values_mixed=pixel_values_mixed, - pixel_mask_mixed=pixel_mask_mixed, - labels=labels, - ) - self.parent.assertEqual( - result.pixel_logits.shape, (self.batch_size, self.expected_pixel_seq_len, self.image_mae_output_dim) - ) - self.parent.assertEqual( - result.audio_logits.shape, (self.batch_size, self.expected_audio_seq_len, self.audio_mae_output_dim) - ) - self.parent.assertEqual(result.matching_logits.shape, (self.batch_size, self.num_labels)) - - def create_and_check_for_pretraining_inference( - self, - config, - pixel_values, - audio_values, - pixel_mask, - audio_mask, - pixel_values_mixed, - pixel_mask_mixed, - labels, - ): - model = TvltForPreTraining(config=config) - model.to(torch_device) - model.eval() - result = model( - pixel_values, - audio_values, - pixel_mask, - audio_mask, - pixel_values_mixed=pixel_values_mixed, - pixel_mask_mixed=pixel_mask_mixed, - labels=labels, - ) - if result.pixel_logits is not None: - self.parent.assertEqual( - result.pixel_logits.shape, (self.batch_size, self.expected_pixel_seq_len, self.image_mae_output_dim) - ) - if result.audio_logits is not None: - self.parent.assertEqual( - result.audio_logits.shape, (self.batch_size, self.expected_audio_seq_len, self.audio_mae_output_dim) - ) - self.parent.assertEqual(result.matching_logits.shape, (self.batch_size, self.num_labels)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, pixel_values, audio_values, pixel_mask, audio_mask) = config_and_inputs - inputs_dict = { - "pixel_values": pixel_values, - "audio_values": audio_values, - "pixel_mask": pixel_mask, - "audio_mask": audio_mask, - } - return config, inputs_dict - - def prepare_pixel_values(self): - return floats_tensor( - [self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size] - ) - - def prepare_audio_values(self): - return floats_tensor( - [self.batch_size, self.num_audio_channels, self.spectrogram_length, self.frequency_length] - ) - - -@require_torch -class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - (TvltModel, TvltForPreTraining, TvltForAudioVisualClassification) if is_torch_available() else () - ) - pipeline_model_mapping = {"feature-extraction": TvltModel} if is_torch_available() else {} - - fx_compatible = False - test_pruning = False - test_headmasking = False - test_torchscript = False - test_resize_embeddings = False - main_input_name = "pixel_values" - - # TvltForAudioVisualClassification and TvltForPreTraining require special treatment - def _prepare_for_class(self, inputs_dict, model_class, return_labels=True): - inputs_dict = copy.deepcopy(inputs_dict) - - if return_labels: - if model_class.__name__ == "TvltForAudioVisualClassification": - inputs_dict["labels"] = torch.zeros( - (self.model_tester.batch_size,), dtype=torch.long, device=torch_device - ) - elif model_class.__name__ == "TvltForPreTraining": - inputs_dict["labels"] = torch.zeros( - (self.model_tester.batch_size,), dtype=torch.float, device=torch_device - ) - inputs_dict["pixel_values_mixed"] = torch.zeros( - ( - self.model_tester.batch_size, - self.model_tester.num_frames, - self.model_tester.num_image_channels, - self.model_tester.image_size, - self.model_tester.image_size, - ), - dtype=torch.float, - device=torch_device, - ) - inputs_dict["pixel_mask_mixed"] = torch.zeros( - (self.model_tester.batch_size, self.model_tester.expected_pixel_seq_len), - dtype=torch.float, - device=torch_device, - ) - - return inputs_dict - - def setUp(self): - self.model_tester = TvltModelTester(self) - self.config_tester = ConfigTester(self, config_class=TvltConfig, has_text_modality=False, hidden_size=37) - - def test_config(self): - self.config_tester.run_common_tests() - - @unittest.skip(reason="TVLT does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - def test_model_common_attributes(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - input_embeddings = model.get_input_embeddings() - self.assertIsInstance(input_embeddings, (tuple)) - for embedding in input_embeddings: - self.assertIsInstance(embedding, (nn.Module)) - x = model.get_output_embeddings() - self.assertTrue(x is None or isinstance(x, nn.Linear)) - - def test_forward_signature(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - signature = inspect.signature(model.forward) - # signature.parameters is an OrderedDict => so arg_names order is deterministic - arg_names = [*signature.parameters.keys()] - - expected_arg_names = ["pixel_values", "audio_values"] - self.assertListEqual(arg_names[:2], expected_arg_names) - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_for_audiovisual_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_audiovisual_classification(*config_and_inputs) - - def test_for_pretraining(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs_for_pretraining() - self.model_tester.create_and_check_for_pretraining(*config_and_inputs) - self.model_tester.create_and_check_for_pretraining_inference(*config_and_inputs) - - @slow - def test_model_from_pretrained(self): - model_name = "ZinengTang/tvlt-base" - model = TvltModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - def test_training(self): - if not self.model_tester.is_training: - return - - for model_class in self.all_model_classes[1:]: - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - model = model_class(config) - model.to(torch_device) - model.train() - inputs = self._prepare_for_class(inputs_dict, model_class) - for k, v in inputs.items(): - print(k, v.shape) - loss = model(**inputs).loss - loss.backward() - - def test_training_gradient_checkpointing(self): - if not self.model_tester.is_training: - return - - for model_class in self.all_model_classes[1:]: - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.use_cache = False - config.return_dict = True - - model = model_class(config) - model.to(torch_device) - model.gradient_checkpointing_enable() - model.train() - inputs = self._prepare_for_class(inputs_dict, model_class) - loss = model(**inputs).loss - loss.backward() - - def test_attention_outputs(self): - if not self.has_attentions: - pass - - else: - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - for model_class in self.all_model_classes[2:]: - seq_len = self.model_tester.expected_seq_len - - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - self.assertEqual(out_len + 1, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - expected_num_layers = self.model_tester.num_hidden_layers + 1 - self.assertEqual(len(hidden_states), expected_num_layers) - - seq_length = self.model_tester.expected_seq_len - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes[2:]: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - - -# We will verify our results on a video of eating spaghetti -# Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227] -def prepare_video(num_frames=8): - file = hf_hub_download( - repo_id="hf-internal-testing/spaghetti-video", filename="eating_spaghetti.npy", repo_type="dataset" - ) - video = np.load(file)[:num_frames] - return list(video) - - -def prepare_audio(num_samples=1): - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") - # automatic decoding with librispeech - speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] - return [x["array"] for x in speech_samples] - - -@require_torch -@require_vision -class TvltModelIntegrationTest(unittest.TestCase): - @cached_property - def default_processors(self): - # logits were tested with a different mean and std, so we use the same here - return ( - TvltImageProcessor() if is_vision_available() else None, - TvltFeatureExtractor(), - ) - - def test_inference_for_base_model(self): - model = TvltModel.from_pretrained("ZinengTang/tvlt-base").to(torch_device) - - image_processor, audio_feature_extractor = self.default_processors - video = prepare_video() - audio = prepare_audio() - video_inputs = image_processor(video, return_tensors="pt").to(torch_device) - audio_inputs = audio_feature_extractor(audio, return_tensors="pt").to(torch_device) - inputs = {} - inputs.update(video_inputs) - inputs.update(audio_inputs) - - # forward pass - with torch.no_grad(): - outputs = model(**inputs) - - # verify the logits - expected_last_hidden_state_slice = torch.tensor([[-0.0186, -0.0691], [0.0242, -0.0398]], device=torch_device) - self.assertTrue( - torch.allclose(outputs.last_hidden_state[:, :2, :2], expected_last_hidden_state_slice, atol=1e-4) - ) - - def test_inference_for_pretraining(self): - model = TvltForPreTraining.from_pretrained("ZinengTang/tvlt-base").to(torch_device) - - image_processor, audio_feature_extractor = self.default_processors - video = prepare_video() - video_mixed = prepare_video() - audio = prepare_audio() - video_inputs = image_processor(video, return_tensors="pt", mask_pixel=True).to(torch_device) - video_mixed_inputs = image_processor(video_mixed, is_mixed=True, return_tensors="pt").to(torch_device) - audio_inputs = audio_feature_extractor(audio, return_tensors="pt", mask_audio=True).to(torch_device) - labels = torch.tensor([[0.0]], device=torch_device) - inputs = {} - inputs.update(video_inputs) - inputs.update(video_mixed_inputs) - inputs.update(audio_inputs) - inputs.update({"labels": labels}) - - # forward pass - with torch.no_grad(): - outputs = model(**inputs) - - # verify the logits - expected_pixel_logits_shape = torch.Size([1, 1568, 768]) - expected_audio_logits_shape = torch.Size([1, 96, 256]) - expected_matching_logits_shape = torch.Size([1, 1]) - - if outputs.pixel_logits is not None: - self.assertEqual(outputs.pixel_logits.shape, expected_pixel_logits_shape) - if outputs.audio_logits is not None: - self.assertEqual(outputs.audio_logits.shape, expected_audio_logits_shape) - self.assertTrue(outputs.matching_logits.shape, expected_matching_logits_shape) diff --git a/tests/models/tvlt/test_processor_tvlt.py b/tests/models/tvlt/test_processor_tvlt.py deleted file mode 100644 index 83f59860fe..0000000000 --- a/tests/models/tvlt/test_processor_tvlt.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import shutil -import tempfile -import unittest - -import numpy as np -import pytest - -from transformers import is_speech_available, is_vision_available -from transformers.testing_utils import require_torch - - -if is_vision_available(): - from transformers import TvltImageProcessor - -if is_speech_available(): - from transformers import TvltFeatureExtractor - -from transformers import TvltProcessor - - -@require_torch -class TvltProcessorTest(unittest.TestCase): - def setUp(self): - self.checkpoint = "ZinengTang/tvlt-base" - self.tmpdirname = tempfile.mkdtemp() - - def get_image_processor(self, **kwargs): - return TvltImageProcessor.from_pretrained(self.checkpoint, **kwargs) - - def get_feature_extractor(self, **kwargs): - return TvltFeatureExtractor.from_pretrained(self.checkpoint, **kwargs) - - def tearDown(self): - shutil.rmtree(self.tmpdirname) - - def test_save_load_pretrained_default(self): - image_processor = self.get_image_processor() - feature_extractor = self.get_feature_extractor() - - processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor) - processor.save_pretrained(self.tmpdirname) - processor = TvltProcessor.from_pretrained(self.tmpdirname) - - self.assertIsInstance(processor.feature_extractor, TvltFeatureExtractor) - self.assertIsInstance(processor.image_processor, TvltImageProcessor) - - def test_feature_extractor(self): - image_processor = self.get_image_processor() - feature_extractor = self.get_feature_extractor() - - processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor) - - audio = np.ones([12000]) - - audio_dict = feature_extractor(audio, return_tensors="np") - input_processor = processor(audio=audio, return_tensors="np") - - for key in audio_dict.keys(): - self.assertAlmostEqual(audio_dict[key].sum(), input_processor[key].sum(), delta=1e-2) - - def test_image_processor(self): - image_processor = self.get_image_processor() - feature_extractor = self.get_feature_extractor() - - processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor) - - images = np.ones([3, 224, 224]) - - image_dict = image_processor(images, return_tensors="np") - input_processor = processor(images=images, return_tensors="np") - - for key in image_dict.keys(): - self.assertAlmostEqual(image_dict[key].sum(), input_processor[key].sum(), delta=1e-2) - - def test_processor(self): - image_processor = self.get_image_processor() - feature_extractor = self.get_feature_extractor() - - processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor) - - audio = np.ones([12000]) - images = np.ones([3, 224, 224]) - - inputs = processor(audio=audio, images=images) - - self.assertListEqual(list(inputs.keys()), ["audio_values", "audio_mask", "pixel_values", "pixel_mask"]) - - # test if it raises when no input is passed - with pytest.raises(ValueError): - processor() - - def test_model_input_names(self): - image_processor = self.get_image_processor() - feature_extractor = self.get_feature_extractor() - - processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor) - - self.assertListEqual( - processor.model_input_names, - image_processor.model_input_names + feature_extractor.model_input_names, - msg="`processor` and `image_processor`+`feature_extractor` model input names do not match", - ) diff --git a/tests/models/vit_hybrid/__init__.py b/tests/models/vit_hybrid/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/vit_hybrid/test_modeling_vit_hybrid.py b/tests/models/vit_hybrid/test_modeling_vit_hybrid.py deleted file mode 100644 index 043dcb4de4..0000000000 --- a/tests/models/vit_hybrid/test_modeling_vit_hybrid.py +++ /dev/null @@ -1,281 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Testing suite for the PyTorch ViT Hybrid model.""" - -import unittest - -from transformers import ViTHybridConfig -from transformers.testing_utils import is_flaky, require_accelerate, require_torch, require_vision, slow, torch_device -from transformers.utils import cached_property, is_torch_available, is_vision_available - -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - from torch import nn - - from transformers import ViTHybridForImageClassification, ViTHybridImageProcessor, ViTHybridModel - - -if is_vision_available(): - from PIL import Image - - -class ViTHybridModelTester: - def __init__( - self, - parent, - batch_size=13, - image_size=64, - patch_size=2, - num_channels=3, - is_training=True, - use_labels=True, - hidden_size=32, - num_hidden_layers=2, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - type_sequence_label_size=10, - initializer_range=0.02, - backbone_featmap_shape=[1, 16, 4, 4], - scope=None, - attn_implementation="eager", - ): - self.parent = parent - self.batch_size = batch_size - self.image_size = image_size - self.patch_size = patch_size - self.num_channels = num_channels - self.is_training = is_training - self.use_labels = use_labels - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.scope = scope - self.backbone_featmap_shape = backbone_featmap_shape - self.attn_implementation = attn_implementation - - # in ViT hybrid, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) - # the number of patches is based on the feature map of the backbone, which by default uses an output stride - # of 32, which means that the feature map has a spatial resolution of 1/32 of the input image size - num_patches = (self.image_size // 32) ** 2 - self.seq_length = num_patches + 1 - - def prepare_config_and_inputs(self): - pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) - - labels = None - if self.use_labels: - labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - - config = self.get_config() - - return config, pixel_values, labels - - def get_config(self): - backbone_config = { - "global_padding": "same", - "layer_type": "bottleneck", - "depths": [3, 4, 9], - "out_features": ["stage1", "stage2", "stage3"], - "embedding_dynamic_padding": True, - "hidden_sizes": [4, 8, 16, 32], - "num_groups": 2, - } - - return ViTHybridConfig( - image_size=self.image_size, - patch_size=self.patch_size, - num_channels=self.num_channels, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - is_decoder=False, - initializer_range=self.initializer_range, - backbone_featmap_shape=self.backbone_featmap_shape, - backbone_config=backbone_config, - backbone=None, - attn_implementation=self.attn_implementation, - ) - - def create_and_check_model(self, config, pixel_values, labels): - model = ViTHybridModel(config=config) - model.to(torch_device) - model.eval() - result = model(pixel_values) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_for_image_classification(self, config, pixel_values, labels): - config.num_labels = self.type_sequence_label_size - model = ViTHybridForImageClassification(config) - model.to(torch_device) - model.eval() - result = model(pixel_values, labels=labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - config, pixel_values, labels = config_and_inputs - inputs_dict = {"pixel_values": pixel_values} - return config, inputs_dict - - -@require_torch -class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): - """ - Here we also overwrite some of the tests of test_modeling_common.py, as ViT does not use input_ids, inputs_embeds, - attention_mask and seq_length. - """ - - all_model_classes = (ViTHybridModel, ViTHybridForImageClassification) if is_torch_available() else () - pipeline_model_mapping = ( - {"image-feature-extraction": ViTHybridModel, "image-classification": ViTHybridForImageClassification} - if is_torch_available() - else {} - ) - test_pruning = False - test_resize_embeddings = False - test_head_masking = False - model_split_percents = [0.5, 0.9] - - def setUp(self): - self.model_tester = ViTHybridModelTester(self) - self.config_tester = ConfigTester(self, config_class=ViTHybridConfig, has_text_modality=False, hidden_size=37) - - def test_config(self): - self.config_tester.run_common_tests() - - @unittest.skip(reason="ViT does not use inputs_embeds") - def test_inputs_embeds(self): - pass - - def test_model_common_attributes(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) - x = model.get_output_embeddings() - self.assertTrue(x is None or isinstance(x, nn.Linear)) - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_for_image_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_classification(*config_and_inputs) - - def test_initialization(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - configs_no_init = _config_zero_init(config) - for model_class in self.all_model_classes: - model = model_class(config=configs_no_init) - # Skip the check for the backbone - for name, module in model.named_modules(): - if module.__class__.__name__ == "ViTHybridPatchEmbeddings": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] - break - - for name, param in model.named_parameters(): - if param.requires_grad: - if name in backbone_params: - continue - self.assertIn( - ((param.data.mean() * 1e9).round() / 1e9).item(), - [0.0, 1.0], - msg=f"Parameter {name} of model {model_class} seems not properly initialized", - ) - - @slow - def test_model_from_pretrained(self): - model_name = "google/vit-hybrid-base-bit-384" - model = ViTHybridModel.from_pretrained(model_name) - self.assertIsNotNone(model) - - @is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516") - def test_batching_equivalence(self): - super().test_batching_equivalence() - - -# We will verify our results on an image of cute cats -def prepare_img(): - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - return image - - -@require_torch -@require_vision -class ViTModelIntegrationTest(unittest.TestCase): - @cached_property - def default_image_processor(self): - return ( - ViTHybridImageProcessor.from_pretrained("google/vit-hybrid-base-bit-384") - if is_vision_available() - else None - ) - - @slow - def test_inference_image_classification_head(self): - model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384").to(torch_device) - - image_processor = self.default_image_processor - image = prepare_img() - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - - # forward pass - with torch.no_grad(): - outputs = model(**inputs) - - # verify the logits - expected_shape = torch.Size((1, 1000)) - self.assertEqual(outputs.logits.shape, expected_shape) - - expected_slice = torch.tensor([-1.9090, -0.4993, -0.2389]).to(torch_device) - - self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4)) - - @slow - @require_accelerate - def test_accelerate_inference(self): - image_processor = ViTHybridImageProcessor.from_pretrained("google/vit-hybrid-base-bit-384") - model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", device_map="auto") - - image = prepare_img() - - inputs = image_processor(images=image, return_tensors="pt").to(torch_device) - outputs = model(**inputs) - logits = outputs.logits - # model predicts one of the 1000 ImageNet classes - predicted_class_idx = logits.argmax(-1).item() - - self.assertTrue(model.config.id2label[predicted_class_idx], "tabby, tabby cat") diff --git a/tests/models/xlm_prophetnet/__init__.py b/tests/models/xlm_prophetnet/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py b/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py deleted file mode 100644 index 614ccd6ebc..0000000000 --- a/tests/models/xlm_prophetnet/test_modeling_xlm_prophetnet.py +++ /dev/null @@ -1,150 +0,0 @@ -# coding=utf-8 -# Copyright 2020 The HuggingFace Inc. team, The Microsoft Research team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import unittest - -from transformers import is_torch_available -from transformers.testing_utils import require_torch, slow, torch_device - - -if is_torch_available(): - import torch - - from transformers import XLMProphetNetForConditionalGeneration, XLMProphetNetTokenizer - - -@require_torch -class XLMProphetNetModelIntegrationTest(unittest.TestCase): - @slow - def test_pretrained_checkpoint_hidden_states(self): - model = XLMProphetNetForConditionalGeneration.from_pretrained("microsoft/xprophetnet-large-wiki100-cased") - model.to(torch_device) - - # encoder-decoder outputs - encoder_ids = torch.tensor([[17, 96208, 103471, 2]]).to(torch_device) - decoder_prev_ids = torch.tensor( - [[2, 250, 9953, 34, 69489, 1620, 32, 118424, 624, 210, 105, 2913, 1032, 351]] - ).to(torch_device) - output = model( - input_ids=encoder_ids, attention_mask=None, encoder_outputs=None, decoder_input_ids=decoder_prev_ids - ) - output_predited_logis = output[0] - expected_shape = torch.Size((1, 14, 250012)) - self.assertEqual(output_predited_logis.shape, expected_shape) - expected_slice = torch.tensor( - [[[-6.3986, -8.2391, 12.5189], [-6.3289, -8.0864, 12.6211], [-6.2418, -8.0445, 12.7968]]] - ).to(torch_device) - self.assertTrue(torch.allclose(output_predited_logis[:, :3, :3], expected_slice, atol=1e-4)) - - # encoder outputs - encoder_outputs = model.prophetnet.encoder(encoder_ids)[0] - expected_encoder_outputs_slice = torch.tensor( - [[[-1.4260, -0.7628, 0.8453], [-1.4719, -0.1391, 0.7807], [-1.7678, 0.0114, 0.4646]]] - ).to(torch_device) - expected_shape_encoder = torch.Size((1, 4, 1024)) - self.assertEqual(encoder_outputs.shape, expected_shape_encoder) - self.assertTrue(torch.allclose(encoder_outputs[:, :3, :3], expected_encoder_outputs_slice, atol=1e-4)) - - # decoder outputs - decoder_outputs = model.prophetnet.decoder( - decoder_prev_ids, - encoder_hidden_states=encoder_outputs, - ) - predicting_streams = decoder_outputs[1].view(1, model.config.ngram, 14, -1) - predicting_streams_logits = model.lm_head(predicting_streams) - next_first_stream_logits = predicting_streams_logits[:, 0] - self.assertTrue(torch.allclose(next_first_stream_logits[:, :3, :3], expected_slice, atol=1e-4)) - - @slow - def test_ntg_hidden_states(self): - model = XLMProphetNetForConditionalGeneration.from_pretrained( - "microsoft/xprophetnet-large-wiki100-cased-xglue-ntg" - ) - model.to(torch_device) - - encoder_ids = torch.tensor([[17, 96208, 103471, 2]]).to(torch_device) - decoder_prev_ids = torch.tensor( - [[2, 250, 9953, 34, 69489, 1620, 32, 118424, 624, 210, 105, 2913, 1032, 351]] - ).to(torch_device) - output = model( - input_ids=encoder_ids, attention_mask=None, encoder_outputs=None, decoder_input_ids=decoder_prev_ids - ) - output_predited_logis = output[0] - expected_shape = torch.Size((1, 14, 250012)) - self.assertEqual(output_predited_logis.shape, expected_shape) - # compare the actual values for a slice. - expected_slice = torch.tensor( - [[[-9.2253, -9.7173, -6.3529], [-7.6701, -9.0145, -1.9382], [-8.0195, -7.0004, -0.1523]]] - ).to(torch_device) - - self.assertTrue(torch.allclose(output_predited_logis[:, :3, :3], expected_slice, atol=1e-4)) - - @slow - def test_xprophetnet_ntg_inference(self): - model = XLMProphetNetForConditionalGeneration.from_pretrained( - "microsoft/xprophetnet-large-wiki100-cased-xglue-ntg" - ) - model.to(torch_device) - model.config.max_length = 512 - - tokenizer = XLMProphetNetTokenizer.from_pretrained("microsoft/xprophetnet-large-wiki100-cased-xglue-ntg") - - EN_SENTENCE = ( - "Microsoft Corporation intends to officially end free support for the Windows 7 operating system after" - " January 14, 2020, according to the official portal of the organization. From that day, users of this" - " system will not be able to receive security updates, which could make their computers vulnerable to" - " cyber attacks." - ) - RU_SENTENCE = ( - "орпорация Microsoft намерена официально прекратить бесплатную поддержку операционной системы Windows 7" - " после 14 января 2020 года, сообщается на официальном портале организации . С указанного дня пользователи" - " этой системы не смогут получать обновления безопасности, из-за чего их компьютеры могут стать уязвимыми" - " к кибератакам." - ) - ZH_SENTENCE = "根据该组织的官方门户网站,微软公司打算在2020年1月14日之后正式终止对Windows 7操作系统的免费支持。从那时起,该系统的用户将无法接收安全更新,这可能会使他们的计算机容易受到网络攻击。" - - input_ids = tokenizer( - [EN_SENTENCE, RU_SENTENCE, ZH_SENTENCE], padding=True, max_length=255, return_tensors="pt" - ).input_ids - input_ids = input_ids.to(torch_device) - - summary_ids = model.generate( - input_ids, num_beams=10, length_penalty=1.0, no_repeat_ngram_size=3, early_stopping=True - ) - generated_titles = [tokenizer.decode(g, skip_special_tokens=True) for g in summary_ids] - EXPECTED_TITLE_EN = "Microsoft to end Windows 7 free support after January 14, 2020" - EXPECTED_TITLE_RU = "Microsoft намерена прекратить бесплатную поддержку Windows 7 после 14 января 2020 года" - EXPECTED_TITLE_ZH = "微软打算终止对Windows 7操作系统的免费支持" - self.assertListEqual( - [EXPECTED_TITLE_EN, EXPECTED_TITLE_RU, EXPECTED_TITLE_ZH], - generated_titles, - ) - - summary_ids_beam1 = model.generate( - input_ids, num_beams=1, length_penalty=1.0, no_repeat_ngram_size=3, early_stopping=True - ) - generated_titles_beam1_tok = [ - tokenizer.convert_ids_to_tokens(g, skip_special_tokens=True) for g in summary_ids_beam1 - ] - EXPECTED_TITLE_EN_BEAM1_TOK = "▁Microsoft ▁to ▁end ▁free ▁support ▁for ▁Windows ▁7".split(" ") - EXPECTED_TITLE_RU_BEAM1_TOK = "▁Microsoft ▁намерен а ▁прекрати ть ▁бес плат ную ▁поддержку ▁Windows ▁7 ▁после ▁14 ▁января ▁2020 ▁года".split( - " " - ) - EXPECTED_TITLE_ZH_BEAM1_TOK = "微软 公司 打算 终止 对 Windows ▁7 操作 系统的 免费 支持".split(" ") - self.assertListEqual( - [EXPECTED_TITLE_EN_BEAM1_TOK, EXPECTED_TITLE_RU_BEAM1_TOK, EXPECTED_TITLE_ZH_BEAM1_TOK], - generated_titles_beam1_tok, - ) diff --git a/tests/models/xlm_prophetnet/test_tokenization_xlm_prophetnet.py b/tests/models/xlm_prophetnet/test_tokenization_xlm_prophetnet.py deleted file mode 100644 index cadcc60049..0000000000 --- a/tests/models/xlm_prophetnet/test_tokenization_xlm_prophetnet.py +++ /dev/null @@ -1,154 +0,0 @@ -# coding=utf-8 -# Copyright 2020 The HuggingFace Inc. team, The Microsoft Research team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from transformers.models.xlm_prophetnet.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer -from transformers.testing_utils import get_tests_dir, require_sentencepiece, slow -from transformers.utils import cached_property - -from ...test_tokenization_common import TokenizerTesterMixin - - -SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model") - - -@require_sentencepiece -class XLMProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): - from_pretrained_id = "microsoft/xprophetnet-large-wiki100-cased" - tokenizer_class = XLMProphetNetTokenizer - test_rust_tokenizer = False - test_sentencepiece = True - - def setUp(self): - super().setUp() - - # We have a SentencePiece fixture for testing - tokenizer = XLMProphetNetTokenizer(SAMPLE_VOCAB, keep_accents=True) - tokenizer.save_pretrained(self.tmpdirname) - - def test_convert_token_and_id(self): - """Test ``_convert_token_to_id`` and ``_convert_id_to_token``.""" - token = "[PAD]" - token_id = 0 - - self.assertEqual(self.get_tokenizer()._convert_token_to_id(token), token_id) - self.assertEqual(self.get_tokenizer()._convert_id_to_token(token_id), token) - - def test_get_vocab(self): - vocab_keys = list(self.get_tokenizer().get_vocab().keys()) - - self.assertEqual(vocab_keys[0], "[PAD]") - self.assertEqual(vocab_keys[1], "[CLS]") - self.assertEqual(vocab_keys[-1], "j") - self.assertEqual(len(vocab_keys), 1_012) - - def test_vocab_size(self): - self.assertEqual(self.get_tokenizer().vocab_size, 1_012) - - def test_full_tokenizer(self): - tokenizer = XLMProphetNetTokenizer(SAMPLE_VOCAB, keep_accents=True) - - tokens = tokenizer.tokenize("This is a test") - self.assertListEqual(tokens, ["▁This", "▁is", "▁a", "▁t", "est"]) - - self.assertListEqual( - tokenizer.convert_tokens_to_ids(tokens), - [value + tokenizer.fairseq_offset for value in [285, 46, 10, 170, 382]], - ) - - tokens = tokenizer.tokenize("I was born in 92000, and this is falsé.") - self.assertListEqual( - tokens, - [ - SPIECE_UNDERLINE + "I", - SPIECE_UNDERLINE + "was", - SPIECE_UNDERLINE + "b", - "or", - "n", - SPIECE_UNDERLINE + "in", - SPIECE_UNDERLINE + "", - "9", - "2", - "0", - "0", - "0", - ",", - SPIECE_UNDERLINE + "and", - SPIECE_UNDERLINE + "this", - SPIECE_UNDERLINE + "is", - SPIECE_UNDERLINE + "f", - "al", - "s", - "é", - ".", - ], - ) - ids = tokenizer.convert_tokens_to_ids(tokens) - self.assertListEqual( - ids, - [ - value + tokenizer.fairseq_offset - for value in [8, 21, 84, 55, 24, 19, 7, -9, 602, 347, 347, 347, 3, 12, 66, 46, 72, 80, 6, -9, 4] - ], - ) - - back_tokens = tokenizer.convert_ids_to_tokens(ids) - self.assertListEqual( - back_tokens, - [ - SPIECE_UNDERLINE + "I", - SPIECE_UNDERLINE + "was", - SPIECE_UNDERLINE + "b", - "or", - "n", - SPIECE_UNDERLINE + "in", - SPIECE_UNDERLINE + "", - "[UNK]", - "2", - "0", - "0", - "0", - ",", - SPIECE_UNDERLINE + "and", - SPIECE_UNDERLINE + "this", - SPIECE_UNDERLINE + "is", - SPIECE_UNDERLINE + "f", - "al", - "s", - "[UNK]", - ".", - ], - ) - - @cached_property - def big_tokenizer(self): - return XLMProphetNetTokenizer.from_pretrained("microsoft/xprophetnet-large-wiki100-cased") - - @slow - def test_tokenization_base_easy_symbols(self): - symbols = "Hello World!" - original_tokenizer_encodings = [35389, 6672, 49, 2] - self.assertListEqual(original_tokenizer_encodings, self.big_tokenizer.encode(symbols)) - - @slow - def test_tokenizer_integration(self): - expected_encoding = {'input_ids': [[11073, 82783, 18, 26, 82783, 549, 51540, 248, 17209, 1301, 217, 20, 215186, 1325, 147, 17209, 1301, 217, 20, 56370, 53, 122020, 20, 16477, 27, 87355, 4548, 20, 4728, 78392, 17, 159969, 18, 26, 24491, 629, 15, 538, 22704, 5439, 15, 2788, 24491, 9885, 15, 43534, 605, 15, 814, 18403, 33200, 29, 15, 43534, 24458, 12410, 111, 24966, 83669, 9637, 144068, 26, 850, 22346, 27, 147, 24966, 83669, 83490, 26, 39113, 735, 27, 689, 656, 2800, 1339, 4600, 53, 122020, 115785, 34, 816, 1339, 46887, 18, 147, 53905, 1951, 42238, 41170, 17732, 834, 436, 15, 27523, 98733, 217, 147, 5542, 4981, 930, 17347, 16, 2], [20091, 629, 94, 82786, 58, 490, 20, 1528, 84, 53905, 344, 80592, 110128, 18822, 5267, 1306, 62, 152537, 308, 7997, 401, 124427, 549, 35442, 225, 109, 15055, 25748, 147, 7119, 43712, 34, 767, 135366, 18, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [592, 63784, 119466, 17, 147808, 88214, 18, 656, 81, 32, 3296, 10280, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]} # fmt: skip - - self.tokenizer_integration_test_util( - expected_encoding=expected_encoding, - model_name="microsoft/xprophetnet-large-wiki100-cased", - revision="1acad1643ddd54a44df6a1b797ada8373685d90e", - ) diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index f631c59b75..e6edcf517a 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -61,8 +61,6 @@ SPECIAL_CASES_TO_ALLOW = { # `ignore_value` used during training (despite we don't have training script for these models yet) # `norm` used in conversion script (despite not using in the modeling file) "OneFormerConfig": ["ignore_value", "norm"], - # used during preprocessing and collation, see `collating_graphormer.py` - "GraphormerConfig": ["spatial_pos_max"], # used internally in the configuration class file "T5Config": ["feed_forward_proj"], # used internally in the configuration class file @@ -134,20 +132,16 @@ SPECIAL_CASES_TO_ALLOW.update( { "CLIPSegConfig": True, "DeformableDetrConfig": True, - "DetaConfig": True, "DinatConfig": True, "DonutSwinConfig": True, - "EfficientFormerConfig": True, "FastSpeech2ConformerConfig": True, "FSMTConfig": True, - "JukeboxConfig": True, "LayoutLMv2Config": True, "MaskFormerSwinConfig": True, "MT5Config": True, # For backward compatibility with trust remote code models "MptConfig": True, "MptAttentionConfig": True, - "NatConfig": True, "OneFormerConfig": True, "PerceiverConfig": True, "RagConfig": True, diff --git a/utils/deprecate_models.py b/utils/deprecate_models.py index 2307f99720..23308e91a7 100644 --- a/utils/deprecate_models.py +++ b/utils/deprecate_models.py @@ -45,14 +45,14 @@ def get_last_stable_minor_release(): def build_tip_message(last_stable_release): return ( """ - + - This model is in maintenance mode only, we don't accept any new PRs changing its code. - """ +This model is in maintenance mode only, we don't accept any new PRs changing its code. +""" + f"""If you run into any issues running this model, please reinstall the last version that supported this model: v{last_stable_release}. - You can do so by running the following command: `pip install -U transformers=={last_stable_release}`. +You can do so by running the following command: `pip install -U transformers=={last_stable_release}`. - """ +""" ) @@ -164,7 +164,8 @@ def update_main_init_file(models): # 1. For each model, find all the instances of model.model_name and replace with model.deprecated.model_name for model in models: - init_file = init_file.replace(f"models.{model}", f"models.deprecated.{model}") + init_file = init_file.replace(f'models.{model}"', f'models.deprecated.{model}"') + init_file = init_file.replace(f"models.{model} import", f"models.deprecated.{model} import") with open(filename, "w") as f: f.write(init_file) @@ -265,14 +266,14 @@ def add_models_to_deprecated_models_in_config_auto(models): elif in_deprecated_models and line.strip() == "]": in_deprecated_models = False # Add the new models to deprecated models list - deprecated_models_list.extend([f'"{model},"' for model in models]) + deprecated_models_list.extend([f' "{model}", ' for model in models]) # Sort so they're in alphabetical order in the file deprecated_models_list = sorted(deprecated_models_list) new_file_lines.extend(deprecated_models_list) # Make sure we still have the closing bracket new_file_lines.append(line) elif in_deprecated_models: - deprecated_models_list.append(line.strip()) + deprecated_models_list.append(line) else: new_file_lines.append(line) diff --git a/utils/not_doctested.txt b/utils/not_doctested.txt index 3ffc274060..0f9334a856 100644 --- a/utils/not_doctested.txt +++ b/utils/not_doctested.txt @@ -520,8 +520,6 @@ src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl_utilities.py src/transformers/models/deprecated/van/configuration_van.py src/transformers/models/deprecated/van/convert_van_to_pytorch.py src/transformers/models/deprecated/van/modeling_van.py -src/transformers/models/deta/convert_deta_resnet_to_pytorch.py -src/transformers/models/deta/convert_deta_swin_to_pytorch.py src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py src/transformers/models/detr/convert_detr_to_pytorch.py src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py @@ -541,9 +539,6 @@ src/transformers/models/dpr/modeling_tf_dpr.py src/transformers/models/dpt/configuration_dpt.py src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py src/transformers/models/dpt/convert_dpt_to_pytorch.py -src/transformers/models/efficientformer/configuration_efficientformer.py -src/transformers/models/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py -src/transformers/models/efficientformer/modeling_efficientformer.py src/transformers/models/efficientnet/configuration_efficientnet.py src/transformers/models/efficientnet/convert_efficientnet_to_pytorch.py src/transformers/models/efficientnet/modeling_efficientnet.py @@ -611,12 +606,6 @@ src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py src/transformers/models/gptj/configuration_gptj.py src/transformers/models/gptj/modeling_flax_gptj.py src/transformers/models/gptj/modeling_tf_gptj.py -src/transformers/models/gptsan_japanese/configuration_gptsan_japanese.py -src/transformers/models/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py -src/transformers/models/gptsan_japanese/modeling_gptsan_japanese.py -src/transformers/models/graphormer/collating_graphormer.py -src/transformers/models/graphormer/configuration_graphormer.py -src/transformers/models/graphormer/modeling_graphormer.py src/transformers/models/groupvit/configuration_groupvit.py src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py src/transformers/models/hubert/configuration_hubert.py @@ -642,9 +631,6 @@ src/transformers/models/instructblip/modeling_instructblip.py src/transformers/models/instructblip/processing_instructblip.py src/transformers/models/jamba/configuration_jamba.py src/transformers/models/jamba/modeling_jamba.py -src/transformers/models/jukebox/configuration_jukebox.py -src/transformers/models/jukebox/convert_jukebox.py -src/transformers/models/jukebox/modeling_jukebox.py src/transformers/models/kosmos2/convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py src/transformers/models/led/configuration_led.py src/transformers/models/led/modeling_led.py @@ -688,9 +674,6 @@ src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py src/transformers/models/maskformer/modeling_maskformer_swin.py src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py src/transformers/models/mbart/modeling_flax_mbart.py -src/transformers/models/mega/configuration_mega.py -src/transformers/models/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py -src/transformers/models/mega/modeling_mega.py src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py src/transformers/models/megatron_bert/modeling_megatron_bert.py src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py @@ -725,7 +708,6 @@ src/transformers/models/mt5/modeling_tf_mt5.py src/transformers/models/musicgen/convert_musicgen_transformers.py src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py src/transformers/models/mvp/modeling_mvp.py -src/transformers/models/nezha/modeling_nezha.py src/transformers/models/nllb_moe/configuration_nllb_moe.py src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -766,8 +748,6 @@ src/transformers/models/pvt/configuration_pvt.py src/transformers/models/pvt/convert_pvt_to_pytorch.py src/transformers/models/pvt/image_processing_pvt.py src/transformers/models/pvt/modeling_pvt.py -src/transformers/models/qdqbert/configuration_qdqbert.py -src/transformers/models/qdqbert/modeling_qdqbert.py src/transformers/models/qwen2/configuration_qwen2.py src/transformers/models/qwen2/modeling_qwen2.py src/transformers/models/qwen2/tokenization_qwen2.py @@ -778,8 +758,6 @@ src/transformers/models/rag/configuration_rag.py src/transformers/models/rag/modeling_rag.py src/transformers/models/rag/modeling_tf_rag.py src/transformers/models/rag/retrieval_rag.py -src/transformers/models/realm/modeling_realm.py -src/transformers/models/realm/retrieval_realm.py src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py src/transformers/models/regnet/configuration_regnet.py @@ -863,8 +841,6 @@ src/transformers/models/timesformer/convert_timesformer_to_pytorch.py src/transformers/models/timm_backbone/configuration_timm_backbone.py src/transformers/models/timm_backbone/modeling_timm_backbone.py src/transformers/models/trocr/convert_trocr_unilm_to_pytorch.py -src/transformers/models/tvlt/configuration_tvlt.py -src/transformers/models/tvlt/modeling_tvlt.py src/transformers/models/umt5/configuration_umt5.py src/transformers/models/umt5/convert_umt5_checkpoint_to_pytorch.py src/transformers/models/umt5/modeling_umt5.py @@ -890,9 +866,6 @@ src/transformers/models/visual_bert/modeling_visual_bert.py src/transformers/models/vit/convert_dino_to_pytorch.py src/transformers/models/vit/convert_vit_timm_to_pytorch.py src/transformers/models/vit/modeling_flax_vit.py -src/transformers/models/vit_hybrid/configuration_vit_hybrid.py -src/transformers/models/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py -src/transformers/models/vit_hybrid/modeling_vit_hybrid.py src/transformers/models/vit_mae/convert_vit_mae_to_pytorch.py src/transformers/models/vit_mae/modeling_tf_vit_mae.py src/transformers/models/vit_msn/configuration_vit_msn.py @@ -922,8 +895,6 @@ src/transformers/models/xglm/modeling_xglm.py src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py src/transformers/models/xlm/modeling_tf_xlm.py src/transformers/models/xlm/modeling_xlm.py -src/transformers/models/xlm_prophetnet/configuration_xlm_prophetnet.py -src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py src/transformers/models/xlm_roberta/modeling_xlm_roberta.py From d521ba579741330d5ccb55e5dd1e35ab4017dc36 Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Wed, 29 May 2024 14:25:44 +0500 Subject: [PATCH 23/36] Quantized KV cache: update quanto (#31052) * quanto latest version was refactored * add error msg * incorrect compare sign * Update src/transformers/cache_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- src/transformers/cache_utils.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py index ad91edfcbb..d308049247 100644 --- a/src/transformers/cache_utils.py +++ b/src/transformers/cache_utils.py @@ -1,17 +1,21 @@ import copy +import importlib.metadata import json import os from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple, Union import torch +from packaging import version from .configuration_utils import PretrainedConfig from .utils import is_hqq_available, is_quanto_available, logging if is_quanto_available(): - from quanto import QBitsTensor, qint2, qint4 + quanto_version = version.parse(importlib.metadata.version("quanto")) + if quanto_version >= version.parse("0.2.0"): + from quanto import AffineQuantizer, MaxOptimizer, qint2, qint4 if is_hqq_available(): from hqq.core.quantize import Quantizer as HQQQuantizer @@ -488,6 +492,13 @@ class QuantoQuantizedCache(QuantizedCache): def __init__(self, cache_config: CacheConfig) -> None: super().__init__(cache_config) + quanto_version = version.parse(importlib.metadata.version("quanto")) + if quanto_version < version.parse("0.2.0"): + raise ImportError( + f"You need quanto package version to be greater or equal than 0.2.0 to use `QuantoQuantizedCache`. Detected version {quanto_version}. " + f"Please upgrade quanto with `pip install -U quanto`" + ) + if self.nbits not in [2, 4]: raise ValueError(f"`nbits` for `quanto` backend has to be one of [`2`, `4`] but got {self.nbits}") @@ -500,9 +511,11 @@ class QuantoQuantizedCache(QuantizedCache): ) self.qtype = qint4 if self.nbits == 4 else qint2 + self.optimizer = MaxOptimizer() # hardcode as it's the only one for per-channel quantization def _quantize(self, tensor, axis): - qtensor = QBitsTensor.quantize(tensor, axis=axis, qtype=self.qtype, group_size=self.q_group_size) + scale, zeropoint = self.optimizer(tensor, self.qtype.bits, axis, self.q_group_size) + qtensor = AffineQuantizer.apply(tensor, self.qtype, axis, self.q_group_size, scale, zeropoint) return qtensor def _dequantize(self, qtensor): From bfe6f513b9b5d782c54b42e50891a2c7aac414a3 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Wed, 29 May 2024 11:43:54 +0200 Subject: [PATCH 24/36] FEAT: Add mistral v3 conversion script (#30981) * add mistral v3 conversion script * Update src/transformers/models/mistral/convert_mistral_weights_to_hf.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * fixup --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --- .../mistral/convert_mistral_weights_to_hf.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py index 4ba6236ee8..266812b397 100644 --- a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py +++ b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py @@ -19,6 +19,7 @@ import shutil import warnings import torch +from safetensors.torch import load_file as safe_load_file from transformers import ( LlamaTokenizer, @@ -76,7 +77,7 @@ def write_json(text, path): json.dump(text, f) -def write_model(model_path, input_base_path, model_size, tokenizer_path=None, safe_serialization=True): +def write_model(model_path, input_base_path, model_size, tokenizer_path=None, safe_serialization=True, is_v3=False): # for backward compatibility, before you needed the repo to be called `my_repo/model_size` if not os.path.isfile(os.path.join(input_base_path, "params.json")): input_base_path = os.path.join(input_base_path, model_size) @@ -88,8 +89,12 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa params = read_json(os.path.join(input_base_path, "params.json")) num_shards = NUM_SHARDS[model_size] + sliding_window = params.get("sliding_window", None) + # For some reason this is a string in the params.json - sliding_window = int(params["sliding_window"]) + if sliding_window is not None: + sliding_window = int(sliding_window) + n_layers = params["n_layers"] n_heads = params["n_heads"] n_heads_per_shard = n_heads // num_shards @@ -100,7 +105,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa max_position_embeddings = 4096 * 8 if tokenizer_path is not None: - tokenizer = tokenizer_class(tokenizer_path) + tokenizer = tokenizer_class(tokenizer_path + ".v3" if is_v3 else "") tokenizer.save_pretrained(model_path) vocab_size = tokenizer.vocab_size if tokenizer_path is not None else 32000 @@ -118,11 +123,15 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa return w.view(n_heads, dim1 // n_heads // 2, 2, dim2).transpose(1, 2).reshape(dim1, dim2) print(f"Fetching all parameters from the checkpoint at {input_base_path}.") - # Load weights - loaded = [ - torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu") - for i in range(num_shards) - ] + + # Load weights - for v3 models the consolidated weights are in a single file format in safetensors + if is_v3: + loaded = [safe_load_file(os.path.join(input_base_path, "consolidated.safetensors"))] + else: + loaded = [ + torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu") + for i in range(num_shards) + ] param_count = 0 index_dict = {"weight_map": {}} for layer_i in range(n_layers): @@ -231,6 +240,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa del model.config._name_or_path model.config.torch_dtype = torch.float16 print("Saving in the Transformers format.") + model.save_pretrained(model_path, safe_serialization=safe_serialization) shutil.rmtree(tmp_model_path) @@ -258,6 +268,9 @@ def main(): help="Location to write HF model and tokenizer", ) parser.add_argument("--safe_serialization", type=bool, help="Whether or not to save using `safetensors`.") + parser.add_argument( + "--is_v3", action="store_true", help="Whether the checkpoints correspond to the 3rd version or not." + ) args = parser.parse_args() spm_path = os.path.join(args.input_dir, "tokenizer.model") if args.model_size != "tokenizer_only": @@ -267,6 +280,7 @@ def main(): model_size=args.model_size, safe_serialization=args.safe_serialization, tokenizer_path=spm_path, + is_v3=args.is_v3, ) else: write_tokenizer(args.output_dir, spm_path) From c3044ec2f3416bdec19ea66504b7911549bd3b16 Mon Sep 17 00:00:00 2001 From: Lucain Date: Wed, 29 May 2024 12:55:43 +0200 Subject: [PATCH 25/36] Use `HF_HUB_OFFLINE` + fix has_file in offline mode (#31016) * Fix has_file in offline mode * harmonize env variable for offline mode * Switch to HF_HUB_OFFLINE * fix test * revert test_offline to test TRANSFORMERS_OFFLINE * Add new offline test * merge conflicts * docs --- docs/source/de/installation.md | 4 +- docs/source/en/installation.md | 4 +- docs/source/es/installation.md | 4 +- docs/source/fr/installation.md | 4 +- docs/source/it/installation.md | 4 +- docs/source/ja/installation.md | 4 +- docs/source/ko/installation.md | 4 +- docs/source/pt/installation.md | 4 +- docs/source/zh/installation.md | 4 +- src/transformers/modeling_flax_utils.py | 2 + src/transformers/modeling_tf_utils.py | 2 + src/transformers/modeling_utils.py | 5 ++ src/transformers/utils/hub.py | 56 ++++++++++--- tests/test_configuration_utils.py | 1 - tests/utils/test_hub_utils.py | 19 ++++- tests/utils/test_offline.py | 103 +++++++++++++----------- 16 files changed, 148 insertions(+), 76 deletions(-) diff --git a/docs/source/de/installation.md b/docs/source/de/installation.md index 55d0f2d851..1bd34f7330 100644 --- a/docs/source/de/installation.md +++ b/docs/source/de/installation.md @@ -162,7 +162,7 @@ Transformers verwendet die Shell-Umgebungsvariablen `PYTORCH_TRANSFORMERS_CACHE` ## Offline Modus -Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `TRANSFORMERS_OFFLINE=1`, um dieses Verhalten zu aktivieren. +Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `HF_HUB_OFFLINE=1`, um dieses Verhalten zu aktivieren. @@ -179,7 +179,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog Führen Sie das gleiche Programm in einer Offline-Instanz mit aus: ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/en/installation.md b/docs/source/en/installation.md index 7ece8eae44..3ed4edf3d8 100644 --- a/docs/source/en/installation.md +++ b/docs/source/en/installation.md @@ -169,7 +169,7 @@ Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hu ## Offline mode -Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable `TRANSFORMERS_OFFLINE=1`. +Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable `HF_HUB_OFFLINE=1`. @@ -178,7 +178,7 @@ Add [🤗 Datasets](https://huggingface.co/docs/datasets/) to your offline train ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/es/installation.md b/docs/source/es/installation.md index b79d0af4a4..714c3b195e 100644 --- a/docs/source/es/installation.md +++ b/docs/source/es/installation.md @@ -154,7 +154,7 @@ Los modelos preentrenados se descargan y almacenan en caché localmente en: `~/. ## Modo Offline -🤗 Transformers puede ejecutarse en un entorno con firewall o fuera de línea (offline) usando solo archivos locales. Configura la variable de entorno `TRANSFORMERS_OFFLINE=1` para habilitar este comportamiento. +🤗 Transformers puede ejecutarse en un entorno con firewall o fuera de línea (offline) usando solo archivos locales. Configura la variable de entorno `HF_HUB_OFFLINE=1` para habilitar este comportamiento. @@ -171,7 +171,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog Ejecuta este mismo programa en una instancia offline con el siguiente comando: ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/fr/installation.md b/docs/source/fr/installation.md index cd68911bc3..bbc93d810f 100644 --- a/docs/source/fr/installation.md +++ b/docs/source/fr/installation.md @@ -171,7 +171,7 @@ Les modèles pré-entraînés sont téléchargés et mis en cache localement dan ## Mode hors ligne -🤗 Transformers peut fonctionner dans un environnement cloisonné ou hors ligne en n'utilisant que des fichiers locaux. Définissez la variable d'environnement `TRANSFORMERS_OFFLINE=1` pour activer ce mode. +🤗 Transformers peut fonctionner dans un environnement cloisonné ou hors ligne en n'utilisant que des fichiers locaux. Définissez la variable d'environnement `HF_HUB_OFFLINE=1` pour activer ce mode. @@ -180,7 +180,7 @@ Ajoutez [🤗 Datasets](https://huggingface.co/docs/datasets/) à votre processu ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/it/installation.md b/docs/source/it/installation.md index 2f45f4182d..a4f444c1eb 100644 --- a/docs/source/it/installation.md +++ b/docs/source/it/installation.md @@ -152,7 +152,7 @@ I modelli pre-allenati sono scaricati e memorizzati localmente nella cache in: ` ## Modalità Offline -🤗 Transformers può essere eseguita in un ambiente firewalled o offline utilizzando solo file locali. Imposta la variabile d'ambiente `TRANSFORMERS_OFFLINE=1` per abilitare questo comportamento. +🤗 Transformers può essere eseguita in un ambiente firewalled o offline utilizzando solo file locali. Imposta la variabile d'ambiente `HF_HUB_OFFLINE=1` per abilitare questo comportamento. @@ -169,7 +169,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog Esegui lo stesso programma in un'istanza offline con: ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/ja/installation.md b/docs/source/ja/installation.md index 915984a91c..a0b9dfe3bd 100644 --- a/docs/source/ja/installation.md +++ b/docs/source/ja/installation.md @@ -157,7 +157,7 @@ conda install conda-forge::transformers ## オフラインモード -🤗 Transformersはローカルファイルのみを使用することでファイアウォールやオフラインの環境でも動作させることができます。この動作を有効にするためには、環境変数`TRANSFORMERS_OFFLINE=1`を設定します。 +🤗 Transformersはローカルファイルのみを使用することでファイアウォールやオフラインの環境でも動作させることができます。この動作を有効にするためには、環境変数`HF_HUB_OFFLINE=1`を設定します。 @@ -174,7 +174,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog オフラインインスタンスでこの同じプログラムを実行します: ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/ko/installation.md b/docs/source/ko/installation.md index 062184e5b3..1583e994d6 100644 --- a/docs/source/ko/installation.md +++ b/docs/source/ko/installation.md @@ -157,7 +157,7 @@ conda install conda-forge::transformers ## 오프라인 모드[[offline-mode]] -🤗 Transformers를 로컬 파일만 사용하도록 해서 방화벽 또는 오프라인 환경에서 실행할 수 있습니다. 활성화하려면 `TRANSFORMERS_OFFLINE=1` 환경 변수를 설정하세요. +🤗 Transformers를 로컬 파일만 사용하도록 해서 방화벽 또는 오프라인 환경에서 실행할 수 있습니다. 활성화하려면 `HF_HUB_OFFLINE=1` 환경 변수를 설정하세요. @@ -174,7 +174,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog 오프라인 기기에서 동일한 프로그램을 다음과 같이 실행할 수 있습니다. ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/pt/installation.md b/docs/source/pt/installation.md index 7eeefd883d..f548736589 100644 --- a/docs/source/pt/installation.md +++ b/docs/source/pt/installation.md @@ -173,7 +173,7 @@ No Windows, este diretório pré-definido é dado por `C:\Users\username\.cache\ ## Modo Offline O 🤗 Transformers também pode ser executado num ambiente de firewall ou fora da rede (offline) usando arquivos locais. -Para tal, configure a variável de ambiente de modo que `TRANSFORMERS_OFFLINE=1`. +Para tal, configure a variável de ambiente de modo que `HF_HUB_OFFLINE=1`. @@ -191,7 +191,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog Execute esse mesmo programa numa instância offline com o seguinte comando: ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/docs/source/zh/installation.md b/docs/source/zh/installation.md index 91e09dc904..f87eaa5fc1 100644 --- a/docs/source/zh/installation.md +++ b/docs/source/zh/installation.md @@ -169,7 +169,7 @@ conda install conda-forge::transformers ## 离线模式 -🤗 Transformers 可以仅使用本地文件在防火墙或离线环境中运行。设置环境变量 `TRANSFORMERS_OFFLINE=1` 以启用该行为。 +🤗 Transformers 可以仅使用本地文件在防火墙或离线环境中运行。设置环境变量 `HF_HUB_OFFLINE=1` 以启用该行为。 @@ -186,7 +186,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog 在离线环境中运行相同的程序: ```bash -HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \ +HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \ python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ... ``` diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index f669329ac0..61077cf7c3 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -823,6 +823,8 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): "revision": revision, "proxies": proxies, "token": token, + "cache_dir": cache_dir, + "local_files_only": local_files_only, } if has_file(pretrained_model_name_or_path, SAFE_WEIGHTS_INDEX_NAME, **has_file_kwargs): is_sharded = True diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index f6b9b00117..0ad5dd0396 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -2864,6 +2864,8 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT "revision": revision, "proxies": proxies, "token": token, + "cache_dir": cache_dir, + "local_files_only": local_files_only, } if has_file(pretrained_model_name_or_path, SAFE_WEIGHTS_INDEX_NAME, **has_file_kwargs): is_sharded = True diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 27f26e42a8..a613fee62c 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3405,6 +3405,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix "revision": revision, "proxies": proxies, "token": token, + "cache_dir": cache_dir, + "local_files_only": local_files_only, } cached_file_kwargs = { "cache_dir": cache_dir, @@ -3432,6 +3434,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix "revision": revision, "proxies": proxies, "token": token, + "cache_dir": cache_dir, + "local_files_only": local_files_only, } if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs): raise EnvironmentError( @@ -3459,6 +3463,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)}," f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}." ) + except EnvironmentError: # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted # to the original exception. diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 173fcb352d..efe40f0e21 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -51,9 +51,11 @@ from huggingface_hub.utils import ( GatedRepoError, HFValidationError, LocalEntryNotFoundError, + OfflineModeIsEnabled, RepositoryNotFoundError, RevisionNotFoundError, build_hf_headers, + get_session, hf_raise_for_status, send_telemetry, ) @@ -75,7 +77,7 @@ from .logging import tqdm logger = logging.get_logger(__name__) # pylint: disable=invalid-name -_is_offline_mode = True if os.environ.get("TRANSFORMERS_OFFLINE", "0").upper() in ENV_VARS_TRUE_VALUES else False +_is_offline_mode = huggingface_hub.constants.HF_HUB_OFFLINE def is_offline_mode(): @@ -599,11 +601,17 @@ def has_file( revision: Optional[str] = None, proxies: Optional[Dict[str, str]] = None, token: Optional[Union[bool, str]] = None, + *, + local_files_only: bool = False, + cache_dir: Union[str, Path, None] = None, + repo_type: Optional[str] = None, **deprecated_kwargs, ): """ Checks if a repo contains a given file without downloading it. Works for remote repos and local folders. + If offline mode is enabled, checks if the file exists in the cache. + This function will raise an error if the repository `path_or_repo` is not valid or if `revision` does not exist for @@ -621,15 +629,41 @@ def has_file( raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") token = use_auth_token + # If path to local directory, check if the file exists if os.path.isdir(path_or_repo): return os.path.isfile(os.path.join(path_or_repo, filename)) - url = hf_hub_url(path_or_repo, filename=filename, revision=revision) - headers = build_hf_headers(token=token, user_agent=http_user_agent()) + # Else it's a repo => let's check if the file exists in local cache or on the Hub - r = requests.head(url, headers=headers, allow_redirects=False, proxies=proxies, timeout=10) + # Check if file exists in cache + # This information might be outdated so it's best to also make a HEAD call (if allowed). + cached_path = try_to_load_from_cache( + repo_id=path_or_repo, + filename=filename, + revision=revision, + repo_type=repo_type, + cache_dir=cache_dir, + ) + has_file_in_cache = isinstance(cached_path, str) + + # If local_files_only, don't try the HEAD call + if local_files_only: + return has_file_in_cache + + # Check if the file exists try: - hf_raise_for_status(r) + response = get_session().head( + hf_hub_url(path_or_repo, filename=filename, revision=revision, repo_type=repo_type), + headers=build_hf_headers(token=token, user_agent=http_user_agent()), + allow_redirects=False, + proxies=proxies, + timeout=10, + ) + except OfflineModeIsEnabled: + return has_file_in_cache + + try: + hf_raise_for_status(response) return True except GatedRepoError as e: logger.error(e) @@ -640,16 +674,20 @@ def has_file( ) from e except RepositoryNotFoundError as e: logger.error(e) - raise EnvironmentError(f"{path_or_repo} is not a local folder or a valid repository name on 'https://hf.co'.") + raise EnvironmentError( + f"{path_or_repo} is not a local folder or a valid repository name on 'https://hf.co'." + ) from e except RevisionNotFoundError as e: logger.error(e) raise EnvironmentError( f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this " f"model name. Check the model page at 'https://huggingface.co/{path_or_repo}' for available revisions." - ) + ) from e + except EntryNotFoundError: + return False # File does not exist except requests.HTTPError: - # We return false for EntryNotFoundError (logical) as well as any connection error. - return False + # Any authentication/authorization error will be caught here => default to cache + return has_file_in_cache class PushToHubMixin: diff --git a/tests/test_configuration_utils.py b/tests/test_configuration_utils.py index a5322a176e..b9f090e061 100644 --- a/tests/test_configuration_utils.py +++ b/tests/test_configuration_utils.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import json import os import shutil diff --git a/tests/utils/test_hub_utils.py b/tests/utils/test_hub_utils.py index c1320baadd..aae9bd63cf 100644 --- a/tests/utils/test_hub_utils.py +++ b/tests/utils/test_hub_utils.py @@ -18,6 +18,7 @@ import unittest import unittest.mock as mock from pathlib import Path +from huggingface_hub import hf_hub_download from requests.exceptions import HTTPError from transformers.utils import ( @@ -33,6 +34,7 @@ from transformers.utils import ( RANDOM_BERT = "hf-internal-testing/tiny-random-bert" +TINY_BERT_PT_ONLY = "hf-internal-testing/tiny-bert-pt-only" CACHE_DIR = os.path.join(TRANSFORMERS_CACHE, "models--hf-internal-testing--tiny-random-bert") FULL_COMMIT_HASH = "9b8c223d42b2188cb49d29af482996f9d0f3e5a6" @@ -99,9 +101,20 @@ class GetFromCacheTests(unittest.TestCase): mock_head.assert_called() def test_has_file(self): - self.assertTrue(has_file("hf-internal-testing/tiny-bert-pt-only", WEIGHTS_NAME)) - self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", TF2_WEIGHTS_NAME)) - self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", FLAX_WEIGHTS_NAME)) + self.assertTrue(has_file(TINY_BERT_PT_ONLY, WEIGHTS_NAME)) + self.assertFalse(has_file(TINY_BERT_PT_ONLY, TF2_WEIGHTS_NAME)) + self.assertFalse(has_file(TINY_BERT_PT_ONLY, FLAX_WEIGHTS_NAME)) + + def test_has_file_in_cache(self): + with tempfile.TemporaryDirectory() as tmp_dir: + # Empty cache dir + offline mode => return False + assert not has_file(TINY_BERT_PT_ONLY, WEIGHTS_NAME, local_files_only=True, cache_dir=tmp_dir) + + # Populate cache dir + hf_hub_download(TINY_BERT_PT_ONLY, WEIGHTS_NAME, cache_dir=tmp_dir) + + # Cache dir + offline mode => return True + assert has_file(TINY_BERT_PT_ONLY, WEIGHTS_NAME, local_files_only=True, cache_dir=tmp_dir) def test_get_file_from_repo_distant(self): # `get_file_from_repo` returns None if the file does not exist diff --git a/tests/utils/test_offline.py b/tests/utils/test_offline.py index ecc7938bf3..59ed034201 100644 --- a/tests/utils/test_offline.py +++ b/tests/utils/test_offline.py @@ -14,6 +14,7 @@ import subprocess import sys +from typing import Tuple from transformers import BertConfig, BertModel, BertTokenizer, pipeline from transformers.testing_utils import TestCasePlus, require_torch @@ -56,15 +57,9 @@ socket.socket = offline_socket pipeline(task="fill-mask", model=mname) # baseline - just load from_pretrained with normal network - cmd = [sys.executable, "-c", "\n".join([load, run, mock])] - - # should succeed - env = self.get_env() # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files - env["TRANSFORMERS_OFFLINE"] = "1" - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 0, result.stderr) - self.assertIn("success", result.stdout.decode()) + stdout, _ = self._execute_with_env(load, run, mock, TRANSFORMERS_OFFLINE="1") + self.assertIn("success", stdout) @require_torch def test_offline_mode_no_internet(self): @@ -97,13 +92,9 @@ socket.socket = offline_socket pipeline(task="fill-mask", model=mname) # baseline - just load from_pretrained with normal network - cmd = [sys.executable, "-c", "\n".join([load, run, mock])] - # should succeed - env = self.get_env() - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 0, result.stderr) - self.assertIn("success", result.stdout.decode()) + stdout, _ = self._execute_with_env(load, run, mock) + self.assertIn("success", stdout) @require_torch def test_offline_mode_sharded_checkpoint(self): @@ -132,27 +123,17 @@ socket.socket = offline_socket """ # baseline - just load from_pretrained with normal network - cmd = [sys.executable, "-c", "\n".join([load, run])] - # should succeed - env = self.get_env() - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 0, result.stderr) - self.assertIn("success", result.stdout.decode()) + stdout, _ = self._execute_with_env(load, run) + self.assertIn("success", stdout) # next emulate no network - cmd = [sys.executable, "-c", "\n".join([load, mock, run])] - # Doesn't fail anymore since the model is in the cache due to other tests, so commenting this. - # env["TRANSFORMERS_OFFLINE"] = "0" - # result = subprocess.run(cmd, env=env, check=False, capture_output=True) - # self.assertEqual(result.returncode, 1, result.stderr) + # self._execute_with_env(load, mock, run, should_fail=True, TRANSFORMERS_OFFLINE="0") # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files - env["TRANSFORMERS_OFFLINE"] = "1" - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 0, result.stderr) - self.assertIn("success", result.stdout.decode()) + stdout, _ = self._execute_with_env(load, mock, run, TRANSFORMERS_OFFLINE="1") + self.assertIn("success", stdout) @require_torch def test_offline_mode_pipeline_exception(self): @@ -169,14 +150,11 @@ import socket def offline_socket(*args, **kwargs): raise socket.error("Offline mode is enabled") socket.socket = offline_socket """ - env = self.get_env() - env["TRANSFORMERS_OFFLINE"] = "1" - cmd = [sys.executable, "-c", "\n".join([load, mock, run])] - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 1, result.stderr) + + _, stderr = self._execute_with_env(load, mock, run, should_fail=True, TRANSFORMERS_OFFLINE="1") self.assertIn( "You cannot infer task automatically within `pipeline` when using offline mode", - result.stderr.decode().replace("\n", ""), + stderr.replace("\n", ""), ) @require_torch @@ -191,16 +169,51 @@ print("success") """ # baseline - just load from_pretrained with normal network - cmd = [sys.executable, "-c", "\n".join([load, run])] - # should succeed - env = self.get_env() - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 0, result.stderr) - self.assertIn("success", result.stdout.decode()) + stdout, _ = self._execute_with_env(load, run) + self.assertIn("success", stdout) # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files - env["TRANSFORMERS_OFFLINE"] = "1" - result = subprocess.run(cmd, env=env, check=False, capture_output=True) - self.assertEqual(result.returncode, 0, result.stderr) - self.assertIn("success", result.stdout.decode()) + stdout, _ = self._execute_with_env(load, run, TRANSFORMERS_OFFLINE="1") + self.assertIn("success", stdout) + + def test_is_offline_mode(self): + """ + Test `_is_offline_mode` helper (should respect both HF_HUB_OFFLINE and legacy TRANSFORMERS_OFFLINE env vars) + """ + load = "from transformers.utils import is_offline_mode" + run = "print(is_offline_mode())" + + stdout, _ = self._execute_with_env(load, run) + self.assertIn("False", stdout) + + stdout, _ = self._execute_with_env(load, run, TRANSFORMERS_OFFLINE="1") + self.assertIn("True", stdout) + + stdout, _ = self._execute_with_env(load, run, HF_HUB_OFFLINE="1") + self.assertIn("True", stdout) + + def _execute_with_env(self, *commands: Tuple[str, ...], should_fail: bool = False, **env) -> Tuple[str, str]: + """Execute Python code with a given environment and return the stdout/stderr as strings. + + If `should_fail=True`, the command is expected to fail. Otherwise, it should succeed. + Environment variables can be passed as keyword arguments. + """ + # Build command + cmd = [sys.executable, "-c", "\n".join(commands)] + + # Configure env + new_env = self.get_env() + new_env.update(env) + + # Run command + result = subprocess.run(cmd, env=new_env, check=False, capture_output=True) + + # Check execution + if should_fail: + self.assertNotEqual(result.returncode, 0, result.stderr) + else: + self.assertEqual(result.returncode, 0, result.stderr) + + # Return output + return result.stdout.decode(), result.stderr.decode() From c8861376adee4c0f962918416dd356fdab552189 Mon Sep 17 00:00:00 2001 From: Huazhong Ji Date: Wed, 29 May 2024 18:57:54 +0800 Subject: [PATCH 26/36] Improve `transformers-cli env` reporting (#31003) * Improve `transformers-cli env` reporting * move the line `"Using GPU in script?": ""` to in if conditional statement * same option for npu --- src/transformers/commands/env.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/transformers/commands/env.py b/src/transformers/commands/env.py index 8567bbcf5b..da9ca6660b 100644 --- a/src/transformers/commands/env.py +++ b/src/transformers/commands/env.py @@ -26,6 +26,7 @@ from ..utils import ( is_safetensors_available, is_tf_available, is_torch_available, + is_torch_npu_available, ) from . import BaseTransformersCLICommand @@ -88,6 +89,7 @@ class EnvironmentCommand(BaseTransformersCLICommand): pt_version = torch.__version__ pt_cuda_available = torch.cuda.is_available() + pt_npu_available = is_torch_npu_available() tf_version = "not installed" tf_cuda_available = "NA" @@ -129,9 +131,15 @@ class EnvironmentCommand(BaseTransformersCLICommand): "Flax version (CPU?/GPU?/TPU?)": f"{flax_version} ({jax_backend})", "Jax version": f"{jax_version}", "JaxLib version": f"{jaxlib_version}", - "Using GPU in script?": "", "Using distributed or parallel set-up in script?": "", } + if pt_cuda_available: + info["Using GPU in script?"] = "" + info["GPU type"] = torch.cuda.get_device_name() + elif pt_npu_available: + info["Using NPU in script?"] = "" + info["NPU type"] = torch.npu.get_device_name() + info["CANN version"] = torch.version.cann print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n") print(self.format_dict(info)) From 97a58a5d2c594d9c80000a30890201d4ddb27889 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 29 May 2024 13:20:36 +0100 Subject: [PATCH 27/36] Fix env.py in cases where torch is not present (#31113) * Fix env.py in cases where torch is not present * Simplify the fix (and avoid some issues) --- src/transformers/commands/env.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/transformers/commands/env.py b/src/transformers/commands/env.py index da9ca6660b..80d8b05e04 100644 --- a/src/transformers/commands/env.py +++ b/src/transformers/commands/env.py @@ -133,13 +133,14 @@ class EnvironmentCommand(BaseTransformersCLICommand): "JaxLib version": f"{jaxlib_version}", "Using distributed or parallel set-up in script?": "", } - if pt_cuda_available: - info["Using GPU in script?"] = "" - info["GPU type"] = torch.cuda.get_device_name() - elif pt_npu_available: - info["Using NPU in script?"] = "" - info["NPU type"] = torch.npu.get_device_name() - info["CANN version"] = torch.version.cann + if is_torch_available(): + if pt_cuda_available: + info["Using GPU in script?"] = "" + info["GPU type"] = torch.cuda.get_device_name() + elif pt_npu_available: + info["Using NPU in script?"] = "" + info["NPU type"] = torch.npu.get_device_name() + info["CANN version"] = torch.version.cann print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n") print(self.format_dict(info)) From 0bef4a273825d2cfc52ddfe62ba486ee61cc116f Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 29 May 2024 13:33:26 +0100 Subject: [PATCH 28/36] Fix faulty rstrip in module loading (#31108) --- src/transformers/dynamic_module_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transformers/dynamic_module_utils.py b/src/transformers/dynamic_module_utils.py index 9de22a3592..b23e9f1717 100644 --- a/src/transformers/dynamic_module_utils.py +++ b/src/transformers/dynamic_module_utils.py @@ -198,7 +198,10 @@ def get_class_in_module(class_name: str, module_path: Union[str, os.PathLike]) - Returns: `typing.Type`: The class looked for. """ - name = os.path.normpath(module_path).rstrip(".py").replace(os.path.sep, ".") + name = os.path.normpath(module_path) + if name.endswith(".py"): + name = name[:-3] + name = name.replace(os.path.sep, ".") module_spec = importlib.util.spec_from_file_location(name, location=Path(HF_MODULES_CACHE) / module_path) module = sys.modules.get(name) if module is None: From d16053c8679be5eed05a8e1bfdb3e0149468e104 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 29 May 2024 09:35:37 -0400 Subject: [PATCH 29/36] Rm maintainer + migrate (#31089) --- .github/ISSUE_TEMPLATE/bug-report.yml | 4 ++-- .github/PULL_REQUEST_TEMPLATE.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index ff47109690..7127a71426 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -37,11 +37,11 @@ body: - pipelines: @Narsil - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker - - trainer: @muellerzr and @pacman100 + - trainer: @muellerzr @SunMarc Integrations: - - deepspeed: HF Trainer/Accelerate: @pacman100 + - deepspeed: HF Trainer/Accelerate: @muellerzr - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc - quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c0f70fe815..ebe92b533b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -51,11 +51,11 @@ Library: - pipelines: @Narsil - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker -- trainer: @muellerzr and @pacman100 +- trainer: @muellerzr and @SunMarc Integrations: -- deepspeed: HF Trainer/Accelerate: @pacman100 +- deepspeed: HF Trainer/Accelerate: @muellerzr - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc - quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada From 1f84141391b435a621f009c2ac78397d606fd233 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 29 May 2024 15:42:39 +0200 Subject: [PATCH 30/36] Fix nightly circleci (#31114) * fix * fix --------- Co-authored-by: ydshieh --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9d04cfb941..cdd97f4fce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -98,7 +98,7 @@ jobs: fetch_all_tests: working_directory: ~/transformers docker: - - image: huggingface/transformers-consistency + - image: huggingface/transformers-quality parallelism: 1 steps: - checkout From cb879c58013f4c77fb792f1938f077f8e11f6453 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Wed, 29 May 2024 15:56:28 +0200 Subject: [PATCH 31/36] FIX / Docs: Fix GPTQ expected number of bits (#31111) Update overview.md --- docs/source/en/quantization/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/quantization/overview.md b/docs/source/en/quantization/overview.md index c6c27edc42..15f9cc1e22 100644 --- a/docs/source/en/quantization/overview.md +++ b/docs/source/en/quantization/overview.md @@ -52,7 +52,7 @@ Use the table below to help you decide which quantization method to use. | [bitsandbytes](./bitsandbytes) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 4 / 8 | 🟢 | 🟢 | 🟢 | https://github.com/TimDettmers/bitsandbytes | | [EETQ](./eetq) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | ? | 8 | 🟢 | 🟢 | 🟢 | https://github.com/NetEase-FuXi/EETQ | | GGUF / GGML (llama.cpp) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 | 1 - 8 | 🔴 | [See GGUF section](../gguf) | [See GGUF section](../gguf) | https://github.com/ggerganov/llama.cpp | -| [GPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 4 / 8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ | +| [GPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 2 - 3 - 4 - 8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ | | [HQQ](./hqq) | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🟢 | 1 - 8 | 🟢 | 🔴 | 🟢 | https://github.com/mobiusml/hqq/ | | [Quanto](./quanto) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🟢 | 2 / 4 / 8 | 🔴 | 🔴 | 🟢 | https://github.com/huggingface/quanto | From 4af705c6cec5882e907438c8350e9f1659d354e1 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 29 May 2024 15:17:14 +0100 Subject: [PATCH 32/36] Add VLM generation default contributor (#31115) * add Raushan * add Raushan --- .github/ISSUE_TEMPLATE/bug-report.yml | 28 +++++++++++++-------------- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 7127a71426..51d713b2e1 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -17,23 +17,23 @@ body: description: | Your issue will be replied to more quickly if you can figure out the right person to tag with @ If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**. - + All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and a core maintainer will ping the right person. - + Please tag fewer than 3 people. - + Models: - text models: @ArthurZucker and @younesbelkada - vision models: @amyeroberts - speech models: @sanchit-gandhi - graph models: @clefourrier - + Library: - + - flax: @sanchit-gandhi - - generate: @gante + - generate: @zucchini-nlp (visual-language models) or @gante (all others) - pipelines: @Narsil - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker @@ -45,22 +45,22 @@ body: - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc - quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada - + Documentation: @stevhliu - + Model hub: - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator. - + HF projects: - + - accelerate: [different repo](https://github.com/huggingface/accelerate) - datasets: [different repo](https://github.com/huggingface/datasets) - diffusers: [different repo](https://github.com/huggingface/diffusers) - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers) - + Maintained examples (not research project or legacy): - + - Flax: @sanchit-gandhi - PyTorch: See Models above and tag the person corresponding to the modality of the example. - TensorFlow: @Rocketknight1 @@ -101,11 +101,11 @@ body: placeholder: | Steps to reproduce the behavior: - + 1. 2. 3. - + - type: textarea id: expected-behavior diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ebe92b533b..650e13d8dc 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -47,7 +47,7 @@ Models: Library: - flax: @sanchit-gandhi -- generate: @gante +- generate: @zucchini-nlp (visual-language models) or @gante (all others) - pipelines: @Narsil - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker From 5c88253556b7f15cf7d7e9793d7b2a39b4aa588a Mon Sep 17 00:00:00 2001 From: Dhruv Pai <46631243+dhruvbpai@users.noreply.github.com> Date: Wed, 29 May 2024 07:20:59 -0700 Subject: [PATCH 33/36] Add on_optimizer_step to callback options (#31095) * Modified test * Added on_optimizer_step to callbacks * Move callback after step is called * Added on optimizer step callback --- src/transformers/trainer.py | 2 ++ src/transformers/trainer_callback.py | 9 +++++++++ tests/trainer/test_trainer_callback.py | 5 ++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 58e5fd14b6..49e7803066 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2306,6 +2306,8 @@ class Trainer: self.optimizer.step() + self.control = self.callback_handler.on_optimizer_step(args, self.state, self.control) + optimizer_was_run = not self.accelerator.optimizer_step_was_skipped if optimizer_was_run: # Delay optimizer scheduling until metrics are generated diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index 45ecf7c80c..207d8ebdff 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -345,6 +345,12 @@ class TrainerCallback: """ pass + def on_optimizer_step(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): + """ + Event called after the optimizer step but before gradients are zeroed out. Useful for monitoring gradients. + """ + pass + def on_substep_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): """ Event called at the end of an substep during gradient accumulation. @@ -470,6 +476,9 @@ class CallbackHandler(TrainerCallback): control.should_save = False return self.call_event("on_step_begin", args, state, control) + def on_optimizer_step(self, args: TrainingArguments, state: TrainerState, control: TrainerControl): + return self.call_event("on_optimizer_step", args, state, control) + def on_substep_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl): return self.call_event("on_substep_end", args, state, control) diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index 9eeb1d5e41..edd73f29dc 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -78,6 +78,9 @@ class MyTestTrainerCallback(TrainerCallback): def on_step_begin(self, args, state, control, **kwargs): self.events.append("on_step_begin") + def on_optimizer_step(self, args, state, control, **kwargs): + self.events.append("on_optimizer_step") + def on_step_end(self, args, state, control, **kwargs): self.events.append("on_step_end") @@ -148,7 +151,7 @@ class TrainerCallbackTest(unittest.TestCase): expected_events.append("on_epoch_begin") for _ in range(train_dl_len): step += 1 - expected_events += ["on_step_begin", "on_step_end"] + expected_events += ["on_step_begin", "on_optimizer_step", "on_step_end"] if step % trainer.args.logging_steps == 0: expected_events.append("on_log") if trainer.args.eval_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0: From 2b9e252b16396c926dad0e3c31802b4af8004e93 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 29 May 2024 19:43:51 +0200 Subject: [PATCH 34/36] Cleanup docker build (#31119) * remove * build --------- Co-authored-by: ydshieh --- .github/workflows/build-docker-images.yml | 30 ------------------- .../build-nightly-ci-docker-images.yml | 4 +-- .../workflows/build-past-ci-docker-images.yml | 4 +-- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index f113579691..df772db773 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -70,16 +70,6 @@ jobs: name: "Latest PyTorch + DeepSpeed" runs-on: [intel-cpu, 8-cpu, ci] steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -116,16 +106,6 @@ jobs: name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" runs-on: [intel-cpu, 8-cpu, ci] steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -202,16 +182,6 @@ jobs: if: inputs.image_postfix != '-push-ci' runs-on: [intel-cpu, 8-cpu, ci] steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml index d7c18775a8..691369c765 100644 --- a/.github/workflows/build-nightly-ci-docker-images.yml +++ b/.github/workflows/build-nightly-ci-docker-images.yml @@ -13,7 +13,7 @@ concurrency: jobs: latest-with-torch-nightly-docker: name: "Nightly PyTorch + Stable TensorFlow" - runs-on: ubuntu-22.04 + runs-on: [intel-cpu, 8-cpu, ci] steps: - name: Cleanup disk run: | @@ -50,7 +50,7 @@ jobs: nightly-torch-deepspeed-docker: name: "Nightly PyTorch + DeepSpeed" - runs-on: ubuntu-22.04 + runs-on: [intel-cpu, 8-cpu, ci] steps: - name: Cleanup disk run: | diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml index 5ef7c7e7de..6ee60b8a6b 100644 --- a/.github/workflows/build-past-ci-docker-images.yml +++ b/.github/workflows/build-past-ci-docker-images.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: version: ["1.13", "1.12", "1.11"] - runs-on: ubuntu-22.04 + runs-on: [intel-cpu, 8-cpu, ci] steps: - name: Set up Docker Buildx @@ -60,7 +60,7 @@ jobs: fail-fast: false matrix: version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"] - runs-on: ubuntu-22.04 + runs-on: [intel-cpu, 8-cpu, ci] steps: - name: Set up Docker Buildx From 5e5c4d629d8bcd9d5b2b4fa859a2bbdbb0011e36 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Thu, 30 May 2024 11:45:03 +0200 Subject: [PATCH 35/36] FIX / Quantization: Add extra validation for bnb config (#31135) add validation for bnb config --- src/transformers/utils/quantization_config.py | 4 ++++ tests/quantization/bnb/test_4bit.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index f9e503cf86..6236827de3 100755 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -383,6 +383,10 @@ class BitsAndBytesConfig(QuantizationConfigMixin): if bnb_4bit_quant_storage is None: self.bnb_4bit_quant_storage = torch.uint8 elif isinstance(bnb_4bit_quant_storage, str): + if bnb_4bit_quant_storage not in ["float16", "float32", "int8", "uint8", "float64", "bfloat16"]: + raise ValueError( + "`bnb_4bit_quant_storage` must be a valid string (one of 'float16', 'float32', 'int8', 'uint8', 'float64', 'bfloat16') " + ) self.bnb_4bit_quant_storage = getattr(torch, bnb_4bit_quant_storage) elif isinstance(bnb_4bit_quant_storage, torch.dtype): self.bnb_4bit_quant_storage = bnb_4bit_quant_storage diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 443b1020a3..ac17979d17 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -303,6 +303,13 @@ class Bnb4BitTest(Base4bitTest): model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small", load_in_4bit=True, device_map="auto") self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32) + def test_bnb_4bit_wrong_config(self): + r""" + Test whether creating a bnb config with unsupported values leads to errors. + """ + with self.assertRaises(ValueError): + _ = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_storage="add") + @require_bitsandbytes @require_accelerate From cda9c82a6362d7bcb5dc998ac2097473df4467bb Mon Sep 17 00:00:00 2001 From: zspo Date: Thu, 30 May 2024 22:25:43 +0800 Subject: [PATCH 36/36] fix get_scheduler when name is warmup_stable_decay (#31128) fix get_scheduler args --- src/transformers/optimization.py | 3 +++ tests/optimization/test_optimization.py | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index 79a2c71c38..a462e3d824 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -540,6 +540,9 @@ def get_scheduler( if name == SchedulerType.INVERSE_SQRT: return schedule_func(optimizer, num_warmup_steps=num_warmup_steps) + if name == SchedulerType.WARMUP_STABLE_DECAY: + return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **scheduler_specific_kwargs) + # All other schedulers require `num_training_steps` if num_training_steps is None: raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.") diff --git a/tests/optimization/test_optimization.py b/tests/optimization/test_optimization.py index 6d6707db5a..5240b03779 100644 --- a/tests/optimization/test_optimization.py +++ b/tests/optimization/test_optimization.py @@ -36,6 +36,7 @@ if is_torch_available(): get_inverse_sqrt_schedule, get_linear_schedule_with_warmup, get_polynomial_decay_schedule_with_warmup, + get_scheduler, get_wsd_schedule, ) @@ -176,6 +177,27 @@ class ScheduleInitTest(unittest.TestCase): lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps) self.assertListEqual(lrs_1, lrs_2, msg=f"failed for {scheduler_func} in save and reload") + def test_get_scheduler(self): + test_params = [ + { + "name": "warmup_stable_decay", + "optimizer": self.optimizer, + "num_warmup_steps": 2, + "scheduler_specific_kwargs": {"num_stable_steps": 1, "num_decay_steps": 3}, + }, + { + "name": "warmup_stable_decay", + "optimizer": self.optimizer, + "num_warmup_steps": 2, + "num_training_steps": 10, + "scheduler_specific_kwargs": {"num_stable_steps": 1, "num_decay_steps": 3}, + }, + {"name": "cosine", "optimizer": self.optimizer, "num_warmup_steps": 2, "num_training_steps": 10}, + ] + + for param in test_params: + self.assertTrue(get_scheduler(**param), msg=f"failed for {param['name']} in get_scheduler") + class LambdaScheduleWrapper: """See https://github.com/huggingface/transformers/issues/21689"""