CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)

* CI with layers=2

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2023-08-02 20:22:36 +02:00 committed by GitHub
parent b28ebb2655
commit bd90cda9a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
147 changed files with 207 additions and 196 deletions

View File

@ -54,8 +54,9 @@ class AlbertModelTester:
vocab_size=99,
embedding_size=16,
hidden_size=36,
num_hidden_layers=6,
num_hidden_groups=6,
num_hidden_layers=2,
# this needs to be the same as `num_hidden_layers`!
num_hidden_groups=2,
num_attention_heads=6,
intermediate_size=37,
hidden_act="gelu",

View File

@ -48,7 +48,7 @@ class FlaxAlbertModelTester(unittest.TestCase):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -242,7 +242,7 @@ class AlignTextModelTester:
use_token_type_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -60,7 +60,7 @@ class AltCLIPVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -212,7 +212,7 @@ class AltCLIPTextModelTester:
hidden_size=32,
projection_dim=32,
project_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -55,7 +55,7 @@ class ASTModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -1289,7 +1289,7 @@ class BartStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -64,7 +64,7 @@ class BeitModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -48,7 +48,7 @@ class FlaxBeitModelTester(unittest.TestCase):
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -57,7 +57,7 @@ class BertModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -47,7 +47,7 @@ class FlaxBertModelTester(unittest.TestCase):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -41,7 +41,7 @@ class BertGenerationEncoderTester:
use_input_mask=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -605,7 +605,7 @@ class BigBirdPegasusStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -51,7 +51,7 @@ class BioGptModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -356,7 +356,7 @@ class BlenderbotStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -365,7 +365,7 @@ class BlenderbotSmallStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -70,7 +70,7 @@ class BlipVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -221,7 +221,7 @@ class BlipTextModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -44,7 +44,7 @@ class BlipTextModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -62,7 +62,7 @@ class Blip2VisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -215,7 +215,7 @@ class Blip2QFormerModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=6,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -289,7 +289,7 @@ class Blip2TextModelDecoderOnlyTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="gelu",
@ -503,7 +503,7 @@ class Blip2TextModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,

View File

@ -54,7 +54,7 @@ class BloomModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -53,7 +53,7 @@ class CanineModelTester:
# NOTE: this is not a model parameter, just an input
vocab_size=100000,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -69,7 +69,7 @@ class ChineseCLIPTextModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
@ -246,7 +246,7 @@ class ChineseCLIPVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -287,7 +287,7 @@ class ClapTextModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -86,7 +86,7 @@ class CLIPVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -261,7 +261,7 @@ class CLIPTextModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -35,7 +35,7 @@ class FlaxCLIPVisionModelTester:
num_channels=3,
is_training=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -252,7 +252,7 @@ class FlaxCLIPTextModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -78,7 +78,7 @@ class CLIPSegVisionModelTester:
num_channels=3,
is_training=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -228,7 +228,7 @@ class CLIPSegTextModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -346,7 +346,15 @@ class CLIPSegTextModelTest(ModelTesterMixin, unittest.TestCase):
class CLIPSegModelTester:
def __init__(self, parent, text_kwargs=None, vision_kwargs=None, is_training=True):
def __init__(
self,
parent,
text_kwargs=None,
vision_kwargs=None,
is_training=True,
# This should respect the `num_hidden_layers` in `CLIPSegVisionModelTester`
extract_layers=(1,),
):
if text_kwargs is None:
text_kwargs = {}
if vision_kwargs is None:
@ -356,6 +364,7 @@ class CLIPSegModelTester:
self.text_model_tester = CLIPSegTextModelTester(parent, **text_kwargs)
self.vision_model_tester = CLIPSegVisionModelTester(parent, **vision_kwargs)
self.is_training = is_training
self.extract_layers = extract_layers
def prepare_config_and_inputs(self):
text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()
@ -371,7 +380,7 @@ class CLIPSegModelTester:
self.vision_model_tester.get_config(),
projection_dim=64,
reduce_dim=32,
extract_layers=[1, 2, 3],
extract_layers=self.extract_layers,
)
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):

View File

@ -47,7 +47,7 @@ class CodeGenModelTester:
vocab_size=256,
hidden_size=32,
rotary_dim=4,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -53,7 +53,7 @@ class ConvBertModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -49,7 +49,7 @@ class CpmAntModelTester:
use_mc_token_ids=False,
vocab_size=99,
hidden_size=32,
num_hidden_layers=3,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
num_buckets=32,

View File

@ -49,7 +49,7 @@ class CTRLModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -59,7 +59,7 @@ class Data2VecAudioModelTester:
conv_bias=False,
num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
hidden_dropout_prob=0.1,
intermediate_size=20,

View File

@ -57,7 +57,7 @@ class Data2VecTextModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -59,7 +59,7 @@ class Data2VecVisionModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -47,7 +47,7 @@ class DebertaModelTester(object):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -48,7 +48,7 @@ class DebertaV2ModelTester(object):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -69,7 +69,7 @@ class DeiTModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -57,7 +57,7 @@ class Dinov2ModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -50,7 +50,7 @@ class DistilBertModelTester(object):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -47,7 +47,7 @@ class FlaxDistilBertModelTester(unittest.TestCase):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -48,7 +48,7 @@ class DPRModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -53,7 +53,7 @@ class DPTModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
backbone_out_indices=[0, 1, 2, 3],
num_attention_heads=4,
intermediate_size=37,
@ -62,7 +62,7 @@ class DPTModelTester:
attention_probs_dropout_prob=0.1,
initializer_range=0.02,
num_labels=3,
neck_hidden_sizes=[16, 16, 32, 32],
neck_hidden_sizes=[16, 32],
is_hybrid=False,
scope=None,
):

View File

@ -54,7 +54,7 @@ class ElectraModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -34,7 +34,7 @@ class FlaxElectraModelTester(unittest.TestCase):
vocab_size=99,
embedding_size=24,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -56,7 +56,7 @@ class ErnieModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -50,7 +50,7 @@ class ErnieMModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -49,7 +49,7 @@ class EsmModelTester:
use_labels=True,
vocab_size=33,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -43,7 +43,7 @@ class EsmFoldModelTester:
use_labels=False,
vocab_size=19,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -50,7 +50,7 @@ class FalconModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -57,7 +57,7 @@ class FlaubertModelTester(object):
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,

View File

@ -79,7 +79,7 @@ class FlavaImageModelTester:
parent,
batch_size=12,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
@ -342,7 +342,7 @@ class FlavaTextModelTester:
max_position_embeddings=512,
position_embedding_type="absolute",
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
@ -487,7 +487,7 @@ class FlavaMultimodalModelTester:
seq_length=44,
use_input_mask=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -70,7 +70,7 @@ class FNetModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,

View File

@ -51,7 +51,7 @@ class GitVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -203,7 +203,7 @@ class GitModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -52,7 +52,7 @@ class FlaxGPT2ModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -56,7 +56,7 @@ class GPT2ModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -55,7 +55,7 @@ class GPTBigCodeModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="relu",

View File

@ -52,9 +52,9 @@ class FlaxGPTNeoModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
attention_types=[[["global", "local"], 2]],
attention_types=[[["global", "local"], 1]],
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,

View File

@ -54,8 +54,8 @@ class GPTNeoModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=4,
attention_types=[[["global", "local"], 2]],
num_hidden_layers=2,
attention_types=[[["global", "local"], 1]],
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -52,7 +52,7 @@ class GPTNeoXModelTester:
use_labels=True,
vocab_size=99,
hidden_size=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -44,7 +44,7 @@ class GPTNeoXJapaneseModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_multiple_size=4,
hidden_act="gelu",

View File

@ -53,7 +53,7 @@ class FlaxGPTJModelTester:
vocab_size=99,
hidden_size=32,
rotary_dim=4,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -56,7 +56,7 @@ class GPTJModelTester:
vocab_size=99,
hidden_size=32,
rotary_dim=4,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -45,7 +45,7 @@ class GPTSanJapaneseTester:
is_training=True,
hidden_size=32,
ext_size=42,
num_hidden_layers=5,
num_hidden_layers=2,
num_ext_layers=2,
num_attention_heads=4,
num_experts=2,

View File

@ -356,7 +356,7 @@ class GroupViTTextModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -553,6 +553,10 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def test_model_common_attributes(self):
pass
# overwritten from parent as this equivalent test needs a specific `seed` and hard to get a good one!
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=2e-5, name="outputs", attributes=None):
super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol=tol, name=name, attributes=attributes)
@is_pt_tf_cross_test
def test_pt_tf_model_equivalence(self):
import tensorflow as tf

View File

@ -71,7 +71,7 @@ class HubertModelTester:
conv_bias=False,
num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20,

View File

@ -62,7 +62,7 @@ class IBertModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -65,7 +65,7 @@ class ImageGPTModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -64,7 +64,7 @@ class InstructBlipVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -219,7 +219,7 @@ class InstructBlipQFormerModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=6,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -295,7 +295,7 @@ class InstructBlipTextModelDecoderOnlyTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="gelu",

View File

@ -48,7 +48,7 @@ class LayoutLMModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -55,7 +55,7 @@ class LayoutLMv2ModelTester:
use_labels=True,
vocab_size=99,
hidden_size=36,
num_hidden_layers=3,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -63,7 +63,7 @@ class LayoutLMv3ModelTester:
use_labels=True,
vocab_size=99,
hidden_size=36,
num_hidden_layers=3,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -46,7 +46,7 @@ class LlamaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -50,7 +50,7 @@ class LongformerModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -71,7 +71,7 @@ class FlaxLongT5ModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,

View File

@ -59,7 +59,7 @@ class LongT5ModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
@ -916,7 +916,7 @@ class LongT5EncoderOnlyModelTester:
# For common tests
use_attention_mask=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,

View File

@ -61,7 +61,7 @@ class LukeModelTester:
entity_vocab_size=10,
entity_emb_size=6,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -661,7 +661,7 @@ class MarianStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -53,7 +53,7 @@ class MarkupLMModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -491,7 +491,7 @@ class MBartStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -51,7 +51,7 @@ class MegaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
intermediate_size=37,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,

View File

@ -58,7 +58,7 @@ class MegatronBertModelTester:
vocab_size=99,
hidden_size=64,
embedding_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -55,7 +55,7 @@ class MgpstrModelTester:
num_bpe_labels=99,
num_wordpiece_labels=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
mlp_ratio=4.0,
patch_embeds_hidden_size=257,

View File

@ -54,7 +54,7 @@ class MobileBertModelTester:
vocab_size=99,
hidden_size=64,
embedding_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -49,7 +49,7 @@ class MPNetModelTester:
use_labels=True,
vocab_size=99,
hidden_size=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=64,
hidden_act="gelu",

View File

@ -54,7 +54,7 @@ class MptModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -51,7 +51,7 @@ class MraModelTester:
use_labels=True,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=36,
hidden_act="gelu",

View File

@ -595,7 +595,7 @@ class MvpStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -55,7 +55,7 @@ class NezhaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -52,7 +52,7 @@ class NllbMoeModelTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="relu",

View File

@ -51,7 +51,7 @@ class NystromformerModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -49,7 +49,7 @@ class OpenAIGPTModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -70,7 +70,7 @@ class OPTModelTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="gelu",

View File

@ -62,7 +62,7 @@ class OwlViTVisionModelTester:
num_channels=3,
is_training=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -52,7 +52,7 @@ class FlaxPegasusModelTester:
use_labels=False,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_dropout_prob=0.1,

View File

@ -371,7 +371,7 @@ class PegasusStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -670,7 +670,7 @@ class PegasusXStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -71,7 +71,7 @@ class Pix2StructVisionModelTester:
patch_embed_hidden_size=12,
projection_dim=32,
max_patches=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
@ -230,7 +230,7 @@ class Pix2StructTextModelTester:
vocab_size=99,
hidden_size=12,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,

View File

@ -473,7 +473,7 @@ class PLBartStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,

View File

@ -55,10 +55,10 @@ class ProphetNetModelTester:
use_labels=True,
decoder_start_token_id=0,
encoder_ffn_dim=32,
num_encoder_layers=4,
num_encoder_layers=2,
num_encoder_attention_heads=4,
decoder_ffn_dim=32,
num_decoder_layers=4,
num_decoder_layers=2,
num_decoder_attention_heads=4,
max_position_embeddings=30,
is_encoder_decoder=True,
@ -437,10 +437,10 @@ class ProphetNetModelTester:
decoder_attention_mask=decoder_attention_mask,
labels=lm_labels,
)
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(4.5981, device=torch_device), atol=1e-3))
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(4.5892, device=torch_device), atol=1e-3))
expected_logit_slice = torch.tensor(
[-0.0648, 0.0790, 0.0360, 0.0089, 0.0039, -0.0639, 0.0131], device=torch_device
[-0.0184, 0.0758, -0.0543, -0.0093, 0.0050, -0.0660, -0.1453], device=torch_device
)
self.parent.assertTrue(torch.allclose(result.logits[0, :, 1], expected_logit_slice, atol=1e-3))
@ -551,10 +551,10 @@ class ProphetNetStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=0,
encoder_ffn_dim=32,
num_encoder_layers=4,
num_encoder_layers=2,
num_encoder_attention_heads=4,
decoder_ffn_dim=32,
num_decoder_layers=4,
num_decoder_layers=2,
num_decoder_attention_heads=4,
max_position_embeddings=30,
is_encoder_decoder=False,
@ -782,10 +782,10 @@ class ProphetNetStandaloneEncoderModelTester:
use_labels=True,
decoder_start_token_id=0,
encoder_ffn_dim=32,
num_encoder_layers=4,
num_encoder_layers=2,
num_encoder_attention_heads=4,
decoder_ffn_dim=32,
num_decoder_layers=4,
num_decoder_layers=2,
num_decoder_attention_heads=4,
max_position_embeddings=30,
is_encoder_decoder=False,

View File

@ -54,7 +54,7 @@ class QDQBertModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -54,7 +54,7 @@ class RealmModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

View File

@ -55,7 +55,7 @@ class RemBertModelTester:
hidden_size=32,
input_embedding_size=18,
output_embedding_size=43,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",

Some files were not shown because too many files have changed in this diff Show More