[cleanup] Hoist ModelTester objects to top level (#4939)
Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
This commit is contained in:
parent
0c55a384f8
commit
c852036b4a
|
@ -37,75 +37,34 @@ if is_torch_available():
|
||||||
from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class AlbertModelTester:
|
||||||
class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
AlbertModel,
|
|
||||||
AlbertForPreTraining,
|
|
||||||
AlbertForMaskedLM,
|
|
||||||
AlbertForMultipleChoice,
|
|
||||||
AlbertForSequenceClassification,
|
|
||||||
AlbertForTokenClassification,
|
|
||||||
AlbertForQuestionAnswering,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class AlbertModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
embedding_size=16,
|
|
||||||
hidden_size=36,
|
|
||||||
num_hidden_layers=6,
|
|
||||||
num_hidden_groups=6,
|
|
||||||
num_attention_heads=6,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.embedding_size = embedding_size
|
self.embedding_size = 16
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 36
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 6
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_hidden_groups = 6
|
||||||
self.intermediate_size = intermediate_size
|
self.num_attention_heads = 6
|
||||||
self.hidden_act = hidden_act
|
self.intermediate_size = 37
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_act = "gelu"
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.type_vocab_size = type_vocab_size
|
self.max_position_embeddings = 512
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_vocab_size = 16
|
||||||
self.initializer_range = initializer_range
|
self.type_sequence_label_size = 2
|
||||||
self.num_labels = num_labels
|
self.initializer_range = 0.02
|
||||||
self.num_choices = num_choices
|
self.num_labels = 3
|
||||||
self.scope = scope
|
self.num_choices = 4
|
||||||
self.num_hidden_groups = num_hidden_groups
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -253,16 +212,12 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
model = AlbertForTokenClassification(config=config)
|
model = AlbertForTokenClassification(config=config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
loss, logits = model(
|
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||||
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
|
|
||||||
)
|
|
||||||
result = {
|
result = {
|
||||||
"loss": loss,
|
"loss": loss,
|
||||||
"logits": logits,
|
"logits": logits,
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
self.check_loss_output(result)
|
self.check_loss_output(result)
|
||||||
|
|
||||||
def create_and_check_albert_for_multiple_choice(
|
def create_and_check_albert_for_multiple_choice(
|
||||||
|
@ -286,7 +241,6 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
"logits": logits,
|
"logits": logits,
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices])
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices])
|
||||||
self.check_loss_output(result)
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
@ -302,8 +256,26 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
AlbertModel,
|
||||||
|
AlbertForPreTraining,
|
||||||
|
AlbertForMaskedLM,
|
||||||
|
AlbertForMultipleChoice,
|
||||||
|
AlbertForSequenceClassification,
|
||||||
|
AlbertForTokenClassification,
|
||||||
|
AlbertForQuestionAnswering,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = AlbertModelTest.AlbertModelTester(self)
|
self.model_tester = AlbertModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -27,66 +27,33 @@ if is_torch_available():
|
||||||
from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, CTRLLMHeadModel
|
from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, CTRLLMHeadModel
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class CTRLModelTester:
|
||||||
class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (CTRLModel, CTRLLMHeadModel) if is_torch_available() else ()
|
|
||||||
all_generative_model_classes = (CTRLLMHeadModel,) if is_torch_available() else ()
|
|
||||||
test_pruning = True
|
|
||||||
test_torchscript = False
|
|
||||||
test_resize_embeddings = False
|
|
||||||
test_head_masking = False
|
|
||||||
|
|
||||||
class CTRLModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=14,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_labels=True,
|
|
||||||
use_mc_token_ids=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 14
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.use_mc_token_ids = use_mc_token_ids
|
self.use_mc_token_ids = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -193,8 +160,19 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (CTRLModel, CTRLLMHeadModel) if is_torch_available() else ()
|
||||||
|
all_generative_model_classes = (CTRLLMHeadModel,) if is_torch_available() else ()
|
||||||
|
test_pruning = True
|
||||||
|
test_torchscript = False
|
||||||
|
test_resize_embeddings = False
|
||||||
|
test_head_masking = False
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = CTRLModelTest.CTRLModelTester(self)
|
self.model_tester = CTRLModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
|
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -34,27 +34,6 @@ if is_torch_available():
|
||||||
DistilBertForSequenceClassification,
|
DistilBertForSequenceClassification,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
DistilBertModel,
|
|
||||||
DistilBertForMaskedLM,
|
|
||||||
DistilBertForMultipleChoice,
|
|
||||||
DistilBertForQuestionAnswering,
|
|
||||||
DistilBertForSequenceClassification,
|
|
||||||
DistilBertForTokenClassification,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
test_pruning = True
|
|
||||||
test_torchscript = True
|
|
||||||
test_resize_embeddings = True
|
|
||||||
test_head_masking = True
|
|
||||||
|
|
||||||
class DistilBertModelTester(object):
|
class DistilBertModelTester(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -245,8 +224,29 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
DistilBertModel,
|
||||||
|
DistilBertForMaskedLM,
|
||||||
|
DistilBertForMultipleChoice,
|
||||||
|
DistilBertForQuestionAnswering,
|
||||||
|
DistilBertForSequenceClassification,
|
||||||
|
DistilBertForTokenClassification,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
test_pruning = True
|
||||||
|
test_torchscript = True
|
||||||
|
test_resize_embeddings = True
|
||||||
|
test_head_masking = True
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = DistilBertModelTest.DistilBertModelTester(self)
|
self.model_tester = DistilBertModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)
|
self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -36,70 +36,32 @@ if is_torch_available():
|
||||||
from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class ElectraModelTester:
|
||||||
class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
ElectraModel,
|
|
||||||
ElectraForPreTraining,
|
|
||||||
ElectraForMaskedLM,
|
|
||||||
ElectraForTokenClassification,
|
|
||||||
ElectraForSequenceClassification,
|
|
||||||
ElectraForQuestionAnswering,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class ElectraModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -216,16 +178,12 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
model = ElectraForTokenClassification(config=config)
|
model = ElectraForTokenClassification(config=config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
loss, logits = model(
|
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||||
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
|
|
||||||
)
|
|
||||||
result = {
|
result = {
|
||||||
"loss": loss,
|
"loss": loss,
|
||||||
"logits": logits,
|
"logits": logits,
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
self.check_loss_output(result)
|
self.check_loss_output(result)
|
||||||
|
|
||||||
def create_and_check_electra_for_pretraining(
|
def create_and_check_electra_for_pretraining(
|
||||||
|
@ -323,8 +281,25 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
ElectraModel,
|
||||||
|
ElectraForPreTraining,
|
||||||
|
ElectraForMaskedLM,
|
||||||
|
ElectraForTokenClassification,
|
||||||
|
ElectraForSequenceClassification,
|
||||||
|
ElectraForQuestionAnswering,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = ElectraModelTest.ElectraModelTester(self)
|
self.model_tester = ElectraModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -35,82 +35,38 @@ if is_torch_available():
|
||||||
from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
FlaubertModel,
|
|
||||||
FlaubertWithLMHeadModel,
|
|
||||||
FlaubertForQuestionAnswering,
|
|
||||||
FlaubertForQuestionAnsweringSimple,
|
|
||||||
FlaubertForSequenceClassification,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class FlaubertModelTester(object):
|
class FlaubertModelTester(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_lengths=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
gelu_activation=True,
|
|
||||||
sinusoidal_embeddings=False,
|
|
||||||
causal=False,
|
|
||||||
asm=False,
|
|
||||||
n_langs=2,
|
|
||||||
vocab_size=99,
|
|
||||||
n_special=0,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
summary_type="last",
|
|
||||||
use_proj=True,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_lengths = use_input_lengths
|
self.use_input_lengths = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.gelu_activation = gelu_activation
|
self.gelu_activation = True
|
||||||
self.sinusoidal_embeddings = sinusoidal_embeddings
|
self.sinusoidal_embeddings = False
|
||||||
self.asm = asm
|
self.causal = False
|
||||||
self.n_langs = n_langs
|
self.asm = False
|
||||||
self.vocab_size = vocab_size
|
self.n_langs = 2
|
||||||
self.n_special = n_special
|
self.vocab_size = 99
|
||||||
self.summary_type = summary_type
|
self.n_special = 0
|
||||||
self.causal = causal
|
self.hidden_size = 32
|
||||||
self.use_proj = use_proj
|
self.num_hidden_layers = 5
|
||||||
self.hidden_size = hidden_size
|
self.num_attention_heads = 4
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.hidden_dropout_prob = 0.1
|
||||||
self.num_attention_heads = num_attention_heads
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.max_position_embeddings = 512
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.type_vocab_size = 12
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.type_sequence_label_size = 2
|
||||||
self.n_langs = n_langs
|
self.initializer_range = 0.02
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.num_labels = 3
|
||||||
self.initializer_range = initializer_range
|
self.num_choices = 4
|
||||||
self.summary_type = summary_type
|
self.summary_type = "last"
|
||||||
self.num_labels = num_labels
|
self.use_proj = None
|
||||||
self.num_choices = num_choices
|
self.scope = None
|
||||||
self.scope = scope
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -215,9 +171,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
|
|
||||||
self.parent.assertListEqual(list(result["loss"].size()), [])
|
self.parent.assertListEqual(list(result["loss"].size()), [])
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size])
|
||||||
list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_flaubert_simple_qa(
|
def create_and_check_flaubert_simple_qa(
|
||||||
self,
|
self,
|
||||||
|
@ -310,8 +264,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
list(result["end_top_index"].size()),
|
list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
||||||
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
|
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
|
||||||
|
|
||||||
|
@ -339,9 +292,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
|
|
||||||
self.parent.assertListEqual(list(result["loss"].size()), [])
|
self.parent.assertListEqual(list(result["loss"].size()), [])
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size])
|
||||||
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
@ -358,8 +309,24 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
FlaubertModel,
|
||||||
|
FlaubertWithLMHeadModel,
|
||||||
|
FlaubertForQuestionAnswering,
|
||||||
|
FlaubertForQuestionAnsweringSimple,
|
||||||
|
FlaubertForSequenceClassification,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = FlaubertModelTest.FlaubertModelTester(self)
|
self.model_tester = FlaubertModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37)
|
self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -34,15 +34,7 @@ if is_torch_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class GPT2ModelTester:
|
||||||
class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(GPT2LMHeadModel,) if is_torch_available() else ()
|
|
||||||
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
|
||||||
|
|
||||||
class GPT2ModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
@ -70,28 +62,28 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
scope=None,
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 14
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.use_mc_token_ids = use_mc_token_ids
|
self.use_mc_token_ids = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0, 1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
self.bos_token_id = vocab_size - 1
|
self.bos_token_id = vocab_size - 1
|
||||||
self.eos_token_id = vocab_size - 1
|
self.eos_token_id = vocab_size - 1
|
||||||
|
|
||||||
|
@ -277,8 +269,7 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
self.parent.assertListEqual(list(result["loss"].size()), [])
|
self.parent.assertListEqual(list(result["loss"].size()), [])
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
list(result["lm_logits"].size()),
|
list(result["lm_logits"].size()), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size],
|
||||||
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size],
|
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices])
|
self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices])
|
||||||
|
|
||||||
|
@ -305,8 +296,17 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(GPT2LMHeadModel,) if is_torch_available() else ()
|
||||||
|
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = GPT2ModelTest.GPT2ModelTester(self)
|
self.model_tester = GPT2ModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
|
self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -36,56 +36,33 @@ if is_torch_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LongformerModelTester(object):
|
class LongformerModelTester:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
attention_window=4,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
self.attention_window = attention_window
|
self.attention_window = 4
|
||||||
|
|
||||||
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
|
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
|
||||||
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
|
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
|
||||||
|
|
|
@ -34,62 +34,31 @@ if is_torch_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class OpenAIGPTModelTester:
|
||||||
class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(OpenAIGPTLMHeadModel,) if is_torch_available() else ()
|
|
||||||
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
|
||||||
|
|
||||||
class OpenAIGPTModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -197,8 +166,19 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(OpenAIGPTLMHeadModel,) if is_torch_available() else ()
|
||||||
|
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = OpenAIGPTModelTest.OpenAIGPTModelTester(self)
|
self.model_tester = OpenAIGPTModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
|
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -39,70 +39,32 @@ if is_torch_available():
|
||||||
from transformers.modeling_utils import create_position_ids_from_input_ids
|
from transformers.modeling_utils import create_position_ids_from_input_ids
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class RobertaModelTester:
|
||||||
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
RobertaForMaskedLM,
|
|
||||||
RobertaModel,
|
|
||||||
RobertaForSequenceClassification,
|
|
||||||
RobertaForTokenClassification,
|
|
||||||
RobertaForMultipleChoice,
|
|
||||||
RobertaForQuestionAnswering,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class RobertaModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -186,16 +148,12 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
model = RobertaForTokenClassification(config=config)
|
model = RobertaForTokenClassification(config=config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
loss, logits = model(
|
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||||
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
|
|
||||||
)
|
|
||||||
result = {
|
result = {
|
||||||
"loss": loss,
|
"loss": loss,
|
||||||
"logits": logits,
|
"logits": logits,
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
self.check_loss_output(result)
|
self.check_loss_output(result)
|
||||||
|
|
||||||
def create_and_check_roberta_for_multiple_choice(
|
def create_and_check_roberta_for_multiple_choice(
|
||||||
|
@ -257,8 +215,25 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
RobertaForMaskedLM,
|
||||||
|
RobertaModel,
|
||||||
|
RobertaForSequenceClassification,
|
||||||
|
RobertaForTokenClassification,
|
||||||
|
RobertaForMultipleChoice,
|
||||||
|
RobertaForQuestionAnswering,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = RobertaModelTest.RobertaModelTester(self)
|
self.model_tester = RobertaModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -30,60 +30,28 @@ if is_torch_available():
|
||||||
from transformers.tokenization_t5 import T5Tokenizer
|
from transformers.tokenization_t5 import T5Tokenizer
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class T5ModelTester:
|
||||||
class T5ModelTest(ModelTesterMixin, unittest.TestCase):
|
def __init__(self, parent):
|
||||||
|
|
||||||
all_model_classes = (T5Model, T5ForConditionalGeneration) if is_torch_available() else ()
|
|
||||||
all_generative_model_classes = (T5ForConditionalGeneration,) if is_torch_available() else ()
|
|
||||||
test_pruning = False
|
|
||||||
test_torchscript = False
|
|
||||||
test_resize_embeddings = False
|
|
||||||
is_encoder_decoder = True
|
|
||||||
|
|
||||||
class T5ModelTester(object):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
encoder_seq_length=7,
|
|
||||||
decoder_seq_length=9,
|
|
||||||
is_training=True,
|
|
||||||
use_attention_mask=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
n_positions=14,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
d_ff=37,
|
|
||||||
relative_attention_num_buckets=8,
|
|
||||||
dropout_rate=0.1,
|
|
||||||
initializer_factor=0.002,
|
|
||||||
eos_token_id=1,
|
|
||||||
pad_token_id=0,
|
|
||||||
decoder_start_token_id=0,
|
|
||||||
scope=None,
|
|
||||||
):
|
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.encoder_seq_length = encoder_seq_length
|
self.encoder_seq_length = 7
|
||||||
self.decoder_seq_length = decoder_seq_length
|
self.decoder_seq_length = 9
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_attention_mask = use_attention_mask
|
self.use_attention_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.n_positions = n_positions
|
self.n_positions = 14
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.d_ff = d_ff
|
self.d_ff = 37
|
||||||
self.relative_attention_num_buckets = relative_attention_num_buckets
|
self.relative_attention_num_buckets = 8
|
||||||
self.dropout_rate = dropout_rate
|
self.dropout_rate = 0.1
|
||||||
self.initializer_factor = initializer_factor
|
self.initializer_factor = 0.002
|
||||||
self.scope = scope
|
self.eos_token_id = 1
|
||||||
self.eos_token_id = eos_token_id
|
self.pad_token_id = 0
|
||||||
self.pad_token_id = pad_token_id
|
self.decoder_start_token_id = 0
|
||||||
self.decoder_start_token_id = decoder_start_token_id
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
|
||||||
|
@ -173,9 +141,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
decoder_attention_mask=decoder_attention_mask,
|
decoder_attention_mask=decoder_attention_mask,
|
||||||
)
|
)
|
||||||
decoder_output, decoder_past, encoder_output = model(
|
decoder_output, decoder_past, encoder_output = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||||
input_ids=input_ids, decoder_input_ids=decoder_input_ids
|
|
||||||
)
|
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"encoder_output": encoder_output,
|
"encoder_output": encoder_output,
|
||||||
|
@ -278,9 +244,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
# get two different outputs
|
# get two different outputs
|
||||||
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
|
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
|
||||||
output_from_past = model(
|
output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0]
|
||||||
next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask
|
|
||||||
)[0]
|
|
||||||
|
|
||||||
# select random slice
|
# select random slice
|
||||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||||
|
@ -316,14 +280,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
(
|
(config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,) = config_and_inputs
|
||||||
config,
|
|
||||||
input_ids,
|
|
||||||
decoder_input_ids,
|
|
||||||
attention_mask,
|
|
||||||
decoder_attention_mask,
|
|
||||||
lm_labels,
|
|
||||||
) = config_and_inputs
|
|
||||||
|
|
||||||
inputs_dict = {
|
inputs_dict = {
|
||||||
"input_ids": input_ids,
|
"input_ids": input_ids,
|
||||||
|
@ -334,8 +291,19 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class T5ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (T5Model, T5ForConditionalGeneration) if is_torch_available() else ()
|
||||||
|
all_generative_model_classes = (T5ForConditionalGeneration,) if is_torch_available() else ()
|
||||||
|
test_pruning = False
|
||||||
|
test_torchscript = False
|
||||||
|
test_resize_embeddings = False
|
||||||
|
is_encoder_decoder = True
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = T5ModelTest.T5ModelTester(self)
|
self.model_tester = T5ModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
|
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -34,22 +34,7 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFAlbertModelTester:
|
||||||
class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
TFAlbertModel,
|
|
||||||
TFAlbertForPreTraining,
|
|
||||||
TFAlbertForMaskedLM,
|
|
||||||
TFAlbertForSequenceClassification,
|
|
||||||
TFAlbertForQuestionAnswering,
|
|
||||||
)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class TFAlbertModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
@ -77,28 +62,28 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
scope=None,
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.embedding_size = embedding_size
|
self.embedding_size = 16
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -228,8 +213,24 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
TFAlbertModel,
|
||||||
|
TFAlbertForPreTraining,
|
||||||
|
TFAlbertForMaskedLM,
|
||||||
|
TFAlbertForSequenceClassification,
|
||||||
|
TFAlbertForQuestionAnswering,
|
||||||
|
)
|
||||||
|
if is_tf_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self)
|
self.model_tester = TFAlbertModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -37,25 +37,7 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFBertModelTester:
|
||||||
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
TFBertModel,
|
|
||||||
TFBertForMaskedLM,
|
|
||||||
TFBertForNextSentencePrediction,
|
|
||||||
TFBertForPreTraining,
|
|
||||||
TFBertForQuestionAnswering,
|
|
||||||
TFBertForSequenceClassification,
|
|
||||||
TFBertForTokenClassification,
|
|
||||||
TFBertForMultipleChoice,
|
|
||||||
)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class TFBertModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
@ -82,27 +64,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
scope=None,
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -240,9 +222,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
result = {
|
result = {
|
||||||
"logits": logits.numpy(),
|
"logits": logits.numpy(),
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_bert_for_question_answering(
|
def create_and_check_bert_for_question_answering(
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -271,8 +251,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
TFBertModel,
|
||||||
|
TFBertForMaskedLM,
|
||||||
|
TFBertForNextSentencePrediction,
|
||||||
|
TFBertForPreTraining,
|
||||||
|
TFBertForQuestionAnswering,
|
||||||
|
TFBertForSequenceClassification,
|
||||||
|
TFBertForTokenClassification,
|
||||||
|
TFBertForMultipleChoice,
|
||||||
|
)
|
||||||
|
if is_tf_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFBertModelTest.TFBertModelTester(self)
|
self.model_tester = TFBertModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -28,62 +28,33 @@ if is_tf_available():
|
||||||
from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
|
||||||
class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else ()
|
|
||||||
all_generative_model_classes = (TFCTRLLMHeadModel,) if is_tf_available() else ()
|
|
||||||
|
|
||||||
class TFCTRLModelTester(object):
|
class TFCTRLModelTester(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_labels=True,
|
|
||||||
use_mc_token_ids=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.use_mc_token_ids = use_mc_token_ids
|
self.use_mc_token_ids = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -183,8 +154,15 @@ class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else ()
|
||||||
|
all_generative_model_classes = (TFCTRLLMHeadModel,) if is_tf_available() else ()
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFCTRLModelTest.TFCTRLModelTester(self)
|
self.model_tester = TFCTRLModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
|
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -32,72 +32,32 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFDistilBertModelTester:
|
||||||
class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
TFDistilBertModel,
|
|
||||||
TFDistilBertForMaskedLM,
|
|
||||||
TFDistilBertForQuestionAnswering,
|
|
||||||
TFDistilBertForSequenceClassification,
|
|
||||||
)
|
|
||||||
if is_tf_available()
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
test_pruning = True
|
|
||||||
test_torchscript = True
|
|
||||||
test_resize_embeddings = True
|
|
||||||
test_head_masking = True
|
|
||||||
|
|
||||||
class TFDistilBertModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=False,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = False
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -193,8 +153,27 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
TFDistilBertModel,
|
||||||
|
TFDistilBertForMaskedLM,
|
||||||
|
TFDistilBertForQuestionAnswering,
|
||||||
|
TFDistilBertForSequenceClassification,
|
||||||
|
)
|
||||||
|
if is_tf_available()
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
test_pruning = True
|
||||||
|
test_torchscript = True
|
||||||
|
test_resize_embeddings = True
|
||||||
|
test_head_masking = True
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFDistilBertModelTest.TFDistilBertModelTester(self)
|
self.model_tester = TFDistilBertModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)
|
self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -32,63 +32,32 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFElectraModelTester:
|
||||||
class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(TFElectraModel, TFElectraForMaskedLM, TFElectraForPreTraining, TFElectraForTokenClassification,)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class TFElectraModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -178,9 +147,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
result = {
|
result = {
|
||||||
"logits": logits.numpy(),
|
"logits": logits.numpy(),
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
@ -196,8 +163,18 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(TFElectraModel, TFElectraForMaskedLM, TFElectraForPreTraining, TFElectraForTokenClassification,)
|
||||||
|
if is_tf_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFElectraModelTest.TFElectraModelTester(self)
|
self.model_tester = TFElectraModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -34,64 +34,35 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFGPT2ModelTester:
|
||||||
class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else ()
|
|
||||||
all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else ()
|
|
||||||
|
|
||||||
class TFGPT2ModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_labels=True,
|
|
||||||
use_mc_token_ids=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.use_mc_token_ids = use_mc_token_ids
|
self.use_mc_token_ids = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
self.bos_token_id = vocab_size - 1
|
self.bos_token_id = self.vocab_size - 1
|
||||||
self.eos_token_id = vocab_size - 1
|
self.eos_token_id = self.vocab_size - 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -294,8 +265,15 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else ()
|
||||||
|
all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else ()
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFGPT2ModelTest.TFGPT2ModelTester(self)
|
self.model_tester = TFGPT2ModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
|
self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -33,66 +33,33 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFOpenAIGPTModelTester:
|
||||||
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(TFOpenAIGPTLMHeadModel,) if is_tf_available() else ()
|
|
||||||
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
|
||||||
|
|
||||||
class TFOpenAIGPTModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_labels=True,
|
|
||||||
use_mc_token_ids=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.use_mc_token_ids = use_mc_token_ids
|
self.use_mc_token_ids = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -214,8 +181,19 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(TFOpenAIGPTLMHeadModel,) if is_tf_available() else ()
|
||||||
|
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFOpenAIGPTModelTest.TFOpenAIGPTModelTester(self)
|
self.model_tester = TFOpenAIGPTModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
|
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -36,69 +36,32 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFRobertaModelTester:
|
||||||
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
TFRobertaModel,
|
|
||||||
TFRobertaForMaskedLM,
|
|
||||||
TFRobertaForSequenceClassification,
|
|
||||||
TFRobertaForTokenClassification,
|
|
||||||
TFRobertaForQuestionAnswering,
|
|
||||||
)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
|
|
||||||
class TFRobertaModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
intermediate_size=37,
|
|
||||||
hidden_act="gelu",
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.intermediate_size = intermediate_size
|
self.intermediate_size = 37
|
||||||
self.hidden_act = hidden_act
|
self.hidden_act = "gelu"
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = 0.1
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = 512
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = 16
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = 2
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = 0.02
|
||||||
self.num_labels = num_labels
|
self.num_labels = 3
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -176,9 +139,7 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
result = {
|
result = {
|
||||||
"logits": logits.numpy(),
|
"logits": logits.numpy(),
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_roberta_for_question_answering(
|
def create_and_check_roberta_for_question_answering(
|
||||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
@ -207,8 +168,24 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
TFRobertaModel,
|
||||||
|
TFRobertaForMaskedLM,
|
||||||
|
TFRobertaForSequenceClassification,
|
||||||
|
TFRobertaForTokenClassification,
|
||||||
|
TFRobertaForQuestionAnswering,
|
||||||
|
)
|
||||||
|
if is_tf_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFRobertaModelTest.TFRobertaModelTester(self)
|
self.model_tester = TFRobertaModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
|
self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -28,53 +28,28 @@ if is_tf_available():
|
||||||
from transformers import TFT5Model, TFT5ForConditionalGeneration, T5Tokenizer
|
from transformers import TFT5Model, TFT5ForConditionalGeneration, T5Tokenizer
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFT5ModelTester:
|
||||||
class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
is_encoder_decoder = True
|
|
||||||
all_model_classes = (TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else ()
|
|
||||||
all_generative_model_classes = (TFT5ForConditionalGeneration,) if is_tf_available() else ()
|
|
||||||
|
|
||||||
class TFT5ModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_mask=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
n_positions=14,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
d_ff=37,
|
|
||||||
relative_attention_num_buckets=8,
|
|
||||||
dropout_rate=0.1,
|
|
||||||
initializer_factor=0.002,
|
|
||||||
eos_token_id=1,
|
|
||||||
pad_token_id=0,
|
|
||||||
scope=None,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.n_positions = n_positions
|
self.n_positions = 14
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.d_ff = d_ff
|
self.d_ff = 37
|
||||||
self.relative_attention_num_buckets = relative_attention_num_buckets
|
self.relative_attention_num_buckets = 8
|
||||||
self.dropout_rate = dropout_rate
|
self.dropout_rate = 0.1
|
||||||
self.initializer_factor = initializer_factor
|
self.initializer_factor = 0.002
|
||||||
self.eos_token_id = eos_token_id
|
self.eos_token_id = 1
|
||||||
self.pad_token_id = pad_token_id
|
self.pad_token_id = 0
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -211,9 +186,7 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
# get two different outputs
|
# get two different outputs
|
||||||
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
|
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
|
||||||
output_from_past = model(
|
output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0]
|
||||||
next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask
|
|
||||||
)[0]
|
|
||||||
|
|
||||||
# select random slice
|
# select random slice
|
||||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item()
|
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item()
|
||||||
|
@ -234,8 +207,16 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
is_encoder_decoder = True
|
||||||
|
all_model_classes = (TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else ()
|
||||||
|
all_generative_model_classes = (TFT5ForConditionalGeneration,) if is_tf_available() else ()
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFT5ModelTest.TFT5ModelTester(self)
|
self.model_tester = TFT5ModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
|
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -33,59 +33,30 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFTransfoXLModelTester:
|
||||||
class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (TFTransfoXLModel, TFTransfoXLLMHeadModel) if is_tf_available() else ()
|
|
||||||
all_generative_model_classes = () if is_tf_available() else ()
|
|
||||||
# TODO: add this test when TFTransfoXLLMHead has a linear output layer implemented
|
|
||||||
test_pruning = False
|
|
||||||
test_torchscript = False
|
|
||||||
test_resize_embeddings = False
|
|
||||||
|
|
||||||
class TFTransfoXLModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
mem_len=30,
|
|
||||||
clamp_len=15,
|
|
||||||
is_training=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
cutoffs=[10, 50, 80],
|
|
||||||
hidden_size=32,
|
|
||||||
d_embed=32,
|
|
||||||
num_attention_heads=4,
|
|
||||||
d_head=8,
|
|
||||||
d_inner=128,
|
|
||||||
div_val=2,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
scope=None,
|
|
||||||
seed=1,
|
|
||||||
eos_token_id=0,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.mem_len = mem_len
|
self.mem_len = 30
|
||||||
self.key_length = seq_length + mem_len
|
self.key_length = self.seq_length + self.mem_len
|
||||||
self.clamp_len = clamp_len
|
self.clamp_len = 15
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.cutoffs = cutoffs
|
self.cutoffs = [10, 50, 80]
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.d_embed = d_embed
|
self.d_embed = 32
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.d_head = d_head
|
self.d_head = 8
|
||||||
self.d_inner = d_inner
|
self.d_inner = 128
|
||||||
self.div_val = div_val
|
self.div_val = 2
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
self.seed = seed
|
self.seed = 1
|
||||||
self.eos_token_id = eos_token_id
|
self.eos_token_id = 0
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -190,8 +161,19 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids_1}
|
inputs_dict = {"input_ids": input_ids_1}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (TFTransfoXLModel, TFTransfoXLLMHeadModel) if is_tf_available() else ()
|
||||||
|
all_generative_model_classes = () if is_tf_available() else ()
|
||||||
|
# TODO: add this test when TFTransfoXLLMHead has a linear output layer implemented
|
||||||
|
test_pruning = False
|
||||||
|
test_torchscript = False
|
||||||
|
test_resize_embeddings = False
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFTransfoXLModelTest.TFTransfoXLModelTester(self)
|
self.model_tester = TFTransfoXLModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
|
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -35,81 +35,39 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFXLMModelTester:
|
||||||
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(TFXLMWithLMHeadModel,) if is_tf_available() else ()
|
|
||||||
) # TODO (PVP): Check other models whether language generation is also applicable
|
|
||||||
|
|
||||||
class TFXLMModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_lengths=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
gelu_activation=True,
|
|
||||||
sinusoidal_embeddings=False,
|
|
||||||
causal=False,
|
|
||||||
asm=False,
|
|
||||||
n_langs=2,
|
|
||||||
vocab_size=99,
|
|
||||||
n_special=0,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
summary_type="last",
|
|
||||||
use_proj=True,
|
|
||||||
scope=None,
|
|
||||||
bos_token_id=0,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_lengths = use_input_lengths
|
self.use_input_lengths = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.gelu_activation = gelu_activation
|
self.gelu_activation = True
|
||||||
self.sinusoidal_embeddings = sinusoidal_embeddings
|
self.sinusoidal_embeddings = False
|
||||||
self.asm = asm
|
self.causal = False
|
||||||
self.n_langs = n_langs
|
self.asm = False
|
||||||
self.vocab_size = vocab_size
|
self.n_langs = 2
|
||||||
self.n_special = n_special
|
self.vocab_size = 99
|
||||||
self.summary_type = summary_type
|
self.n_special = 0
|
||||||
self.causal = causal
|
self.hidden_size = 32
|
||||||
self.use_proj = use_proj
|
self.num_hidden_layers = 5
|
||||||
self.hidden_size = hidden_size
|
self.num_attention_heads = 4
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.hidden_dropout_prob = 0.1
|
||||||
self.num_attention_heads = num_attention_heads
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.max_position_embeddings = 512
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.type_vocab_size = 16
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.type_sequence_label_size = 2
|
||||||
self.n_langs = n_langs
|
self.initializer_range = 0.02
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.num_labels = 3
|
||||||
self.initializer_range = initializer_range
|
self.num_choices = 4
|
||||||
self.summary_type = summary_type
|
self.summary_type = "last"
|
||||||
self.num_labels = num_labels
|
self.use_proj = True
|
||||||
self.num_choices = num_choices
|
self.scope = None
|
||||||
self.scope = scope
|
self.bos_token_id = 0
|
||||||
self.bos_token_id = bos_token_id
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -211,9 +169,7 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
"logits": logits.numpy(),
|
"logits": logits.numpy(),
|
||||||
}
|
}
|
||||||
|
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_xlm_qa(
|
def create_and_check_xlm_qa(
|
||||||
self,
|
self,
|
||||||
|
@ -283,8 +239,21 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
|
||||||
|
if is_tf_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(TFXLMWithLMHeadModel,) if is_tf_available() else ()
|
||||||
|
) # TODO (PVP): Check other models whether language generation is also applicable
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFXLMModelTest.TFXLMModelTester(self)
|
self.model_tester = TFXLMModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
|
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -37,78 +37,35 @@ if is_tf_available():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@require_tf
|
class TFXLNetModelTester:
|
||||||
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
TFXLNetModel,
|
|
||||||
TFXLNetLMHeadModel,
|
|
||||||
TFXLNetForSequenceClassification,
|
|
||||||
TFXLNetForTokenClassification,
|
|
||||||
TFXLNetForQuestionAnsweringSimple,
|
|
||||||
)
|
|
||||||
if is_tf_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(TFXLNetLMHeadModel,) if is_tf_available() else ()
|
|
||||||
) # TODO (PVP): Check other models whether language generation is also applicable
|
|
||||||
test_pruning = False
|
|
||||||
|
|
||||||
class TFXLNetModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
mem_len=10,
|
|
||||||
clamp_len=-1,
|
|
||||||
reuse_len=15,
|
|
||||||
is_training=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
cutoffs=[10, 50, 80],
|
|
||||||
hidden_size=32,
|
|
||||||
num_attention_heads=4,
|
|
||||||
d_inner=128,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
untie_r=True,
|
|
||||||
bi_data=False,
|
|
||||||
same_length=False,
|
|
||||||
initializer_range=0.05,
|
|
||||||
seed=1,
|
|
||||||
type_vocab_size=2,
|
|
||||||
bos_token_id=1,
|
|
||||||
eos_token_id=2,
|
|
||||||
pad_token_id=5,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.mem_len = mem_len
|
self.mem_len = 10
|
||||||
# self.key_len = seq_length + mem_len
|
# self.key_len = seq_length + mem_len
|
||||||
self.clamp_len = clamp_len
|
self.clamp_len = -1
|
||||||
self.reuse_len = reuse_len
|
self.reuse_len = 15
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.cutoffs = cutoffs
|
self.cutoffs = [10, 50, 80]
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.d_inner = d_inner
|
self.d_inner = 128
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.bi_data = bi_data
|
self.type_sequence_label_size = 2
|
||||||
self.untie_r = untie_r
|
self.untie_r = True
|
||||||
self.same_length = same_length
|
self.bi_data = False
|
||||||
self.initializer_range = initializer_range
|
self.same_length = False
|
||||||
self.seed = seed
|
self.initializer_range = 0.05
|
||||||
self.type_vocab_size = type_vocab_size
|
self.seed = 1
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_vocab_size = 2
|
||||||
self.bos_token_id = bos_token_id
|
self.bos_token_id = 1
|
||||||
self.pad_token_id = pad_token_id
|
self.eos_token_id = 2
|
||||||
self.eos_token_id = eos_token_id
|
self.pad_token_id = 5
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -377,8 +334,28 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids_1}
|
inputs_dict = {"input_ids": input_ids_1}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
TFXLNetModel,
|
||||||
|
TFXLNetLMHeadModel,
|
||||||
|
TFXLNetForSequenceClassification,
|
||||||
|
TFXLNetForTokenClassification,
|
||||||
|
TFXLNetForQuestionAnsweringSimple,
|
||||||
|
)
|
||||||
|
if is_tf_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(TFXLNetLMHeadModel,) if is_tf_available() else ()
|
||||||
|
) # TODO (PVP): Check other models whether language generation is also applicable
|
||||||
|
test_pruning = False
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TFXLNetModelTest.TFXLNetModelTester(self)
|
self.model_tester = TFXLNetModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
|
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -29,58 +29,30 @@ if is_torch_available():
|
||||||
from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class TransfoXLModelTester:
|
||||||
class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel) if is_torch_available() else ()
|
|
||||||
all_generative_model_classes = (TransfoXLLMHeadModel,) if is_torch_available() else ()
|
|
||||||
test_pruning = False
|
|
||||||
test_torchscript = False
|
|
||||||
test_resize_embeddings = True
|
|
||||||
|
|
||||||
class TransfoXLModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=14,
|
|
||||||
seq_length=7,
|
|
||||||
mem_len=30,
|
|
||||||
clamp_len=15,
|
|
||||||
is_training=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
cutoffs=[10, 50, 80],
|
|
||||||
hidden_size=32,
|
|
||||||
d_embed=32,
|
|
||||||
num_attention_heads=4,
|
|
||||||
d_head=8,
|
|
||||||
d_inner=128,
|
|
||||||
div_val=2,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
scope=None,
|
|
||||||
seed=1,
|
|
||||||
eos_token_id=0,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 14
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.mem_len = mem_len
|
self.mem_len = 30
|
||||||
self.key_length = seq_length + mem_len
|
self.key_length = self.seq_length + self.mem_len
|
||||||
self.clamp_len = clamp_len
|
self.clamp_len = 15
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.cutoffs = cutoffs
|
self.cutoffs = [10, 50, 80]
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.d_embed = d_embed
|
self.d_embed = 32
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.d_head = d_head
|
self.d_head = 8
|
||||||
self.d_inner = d_inner
|
self.d_inner = 128
|
||||||
self.div_val = div_val
|
self.div_val = 2
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.scope = scope
|
self.scope = None
|
||||||
self.seed = seed
|
self.seed = 1
|
||||||
self.eos_token_id = eos_token_id
|
self.eos_token_id = 0
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -187,6 +159,16 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids_1}
|
inputs_dict = {"input_ids": input_ids_1}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel) if is_torch_available() else ()
|
||||||
|
all_generative_model_classes = (TransfoXLLMHeadModel,) if is_torch_available() else ()
|
||||||
|
test_pruning = False
|
||||||
|
test_torchscript = False
|
||||||
|
test_resize_embeddings = True
|
||||||
|
|
||||||
def check_cutoffs_and_n_token(
|
def check_cutoffs_and_n_token(
|
||||||
self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size
|
self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size
|
||||||
):
|
):
|
||||||
|
@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
self.assertEqual(model.crit.n_token, vocab_size + resized_value)
|
self.assertEqual(model.crit.n_token, vocab_size + resized_value)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self)
|
self.model_tester = TransfoXLModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
|
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -37,87 +37,38 @@ if is_torch_available():
|
||||||
from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class XLMModelTester:
|
||||||
class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
XLMModel,
|
|
||||||
XLMWithLMHeadModel,
|
|
||||||
XLMForQuestionAnswering,
|
|
||||||
XLMForSequenceClassification,
|
|
||||||
XLMForQuestionAnsweringSimple,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(XLMWithLMHeadModel,) if is_torch_available() else ()
|
|
||||||
) # TODO (PVP): Check other models whether language generation is also applicable
|
|
||||||
|
|
||||||
class XLMModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, parent,
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_input_lengths=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
gelu_activation=True,
|
|
||||||
sinusoidal_embeddings=False,
|
|
||||||
causal=False,
|
|
||||||
asm=False,
|
|
||||||
n_langs=2,
|
|
||||||
vocab_size=99,
|
|
||||||
n_special=0,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
hidden_dropout_prob=0.1,
|
|
||||||
attention_probs_dropout_prob=0.1,
|
|
||||||
max_position_embeddings=512,
|
|
||||||
type_vocab_size=16,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
num_choices=4,
|
|
||||||
summary_type="last",
|
|
||||||
use_proj=True,
|
|
||||||
scope=None,
|
|
||||||
bos_token_id=0,
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 13
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_input_lengths = use_input_lengths
|
self.use_input_lengths = True
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.gelu_activation = gelu_activation
|
self.gelu_activation = True
|
||||||
self.sinusoidal_embeddings = sinusoidal_embeddings
|
self.sinusoidal_embeddings = False
|
||||||
self.asm = asm
|
self.causal = False
|
||||||
self.n_langs = n_langs
|
self.asm = False
|
||||||
self.vocab_size = vocab_size
|
self.n_langs = 2
|
||||||
self.n_special = n_special
|
self.vocab_size = 99
|
||||||
self.summary_type = summary_type
|
self.n_special = 0
|
||||||
self.causal = causal
|
self.hidden_size = 32
|
||||||
self.use_proj = use_proj
|
self.num_hidden_layers = 5
|
||||||
self.hidden_size = hidden_size
|
self.num_attention_heads = 4
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.hidden_dropout_prob = 0.1
|
||||||
self.num_attention_heads = num_attention_heads
|
self.attention_probs_dropout_prob = 0.1
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.max_position_embeddings = 512
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.type_sequence_label_size = 2
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.initializer_range = 0.02
|
||||||
self.n_langs = n_langs
|
self.num_labels = 3
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.num_choices = 4
|
||||||
self.initializer_range = initializer_range
|
self.summary_type = "last"
|
||||||
self.summary_type = summary_type
|
self.use_proj = True
|
||||||
self.num_labels = num_labels
|
self.scope = None
|
||||||
self.num_choices = num_choices
|
self.bos_token_id = 0
|
||||||
self.scope = scope
|
|
||||||
self.bos_token_id = bos_token_id
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -223,9 +174,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
|
|
||||||
self.parent.assertListEqual(list(result["loss"].size()), [])
|
self.parent.assertListEqual(list(result["loss"].size()), [])
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size])
|
||||||
list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_xlm_simple_qa(
|
def create_and_check_xlm_simple_qa(
|
||||||
self,
|
self,
|
||||||
|
@ -318,8 +267,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
list(result["end_top_index"].size()),
|
list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
||||||
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
|
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
|
||||||
|
|
||||||
|
@ -347,9 +295,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
}
|
}
|
||||||
|
|
||||||
self.parent.assertListEqual(list(result["loss"].size()), [])
|
self.parent.assertListEqual(list(result["loss"].size()), [])
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size])
|
||||||
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_xlm_for_token_classification(
|
def create_and_check_xlm_for_token_classification(
|
||||||
self,
|
self,
|
||||||
|
@ -372,9 +318,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
"loss": loss,
|
"loss": loss,
|
||||||
"logits": logits,
|
"logits": logits,
|
||||||
}
|
}
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
|
|
||||||
)
|
|
||||||
self.check_loss_output(result)
|
self.check_loss_output(result)
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
@ -392,8 +336,27 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
|
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
XLMModel,
|
||||||
|
XLMWithLMHeadModel,
|
||||||
|
XLMForQuestionAnswering,
|
||||||
|
XLMForSequenceClassification,
|
||||||
|
XLMForQuestionAnsweringSimple,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(XLMWithLMHeadModel,) if is_torch_available() else ()
|
||||||
|
) # TODO (PVP): Check other models whether language generation is also applicable
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = XLMModelTest.XLMModelTester(self)
|
self.model_tester = XLMModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
|
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
|
@ -39,27 +39,7 @@ if is_torch_available():
|
||||||
from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
class XLNetModelTester:
|
||||||
class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
|
||||||
|
|
||||||
all_model_classes = (
|
|
||||||
(
|
|
||||||
XLNetModel,
|
|
||||||
XLNetLMHeadModel,
|
|
||||||
XLNetForTokenClassification,
|
|
||||||
XLNetForSequenceClassification,
|
|
||||||
XLNetForQuestionAnswering,
|
|
||||||
XLNetForMultipleChoice,
|
|
||||||
)
|
|
||||||
if is_torch_available()
|
|
||||||
else ()
|
|
||||||
)
|
|
||||||
all_generative_model_classes = (
|
|
||||||
(XLNetLMHeadModel,) if is_torch_available() else ()
|
|
||||||
) # TODO (PVP): Check other models whether language generation is also applicable
|
|
||||||
test_pruning = False
|
|
||||||
|
|
||||||
class XLNetModelTester(object):
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
@ -89,31 +69,31 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
num_choices=4,
|
num_choices=4,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = 14
|
||||||
self.seq_length = seq_length
|
self.seq_length = 7
|
||||||
self.mem_len = mem_len
|
self.mem_len = 10
|
||||||
# self.key_len = seq_length + mem_len
|
# self.key_len = seq_length + mem_len
|
||||||
self.clamp_len = clamp_len
|
self.clamp_len = -1
|
||||||
self.reuse_len = reuse_len
|
self.reuse_len = 15
|
||||||
self.is_training = is_training
|
self.is_training = True
|
||||||
self.use_labels = use_labels
|
self.use_labels = True
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = 99
|
||||||
self.cutoffs = cutoffs
|
self.cutoffs = [10, 50, 80]
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = 32
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = 4
|
||||||
self.d_inner = d_inner
|
self.d_inner = 128
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = 5
|
||||||
self.bi_data = bi_data
|
self.type_sequence_label_size = 2
|
||||||
self.untie_r = untie_r
|
self.untie_r = True
|
||||||
self.same_length = same_length
|
self.bi_data = False
|
||||||
self.initializer_range = initializer_range
|
self.same_length = False
|
||||||
self.seed = seed
|
self.initializer_range = 0.05
|
||||||
self.type_vocab_size = type_vocab_size
|
self.seed = 1
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_vocab_size = 2
|
||||||
self.bos_token_id = bos_token_id
|
self.bos_token_id = 1
|
||||||
self.pad_token_id = pad_token_id
|
self.eos_token_id = 2
|
||||||
self.eos_token_id = eos_token_id
|
self.pad_token_id = 5
|
||||||
self.num_choices = num_choices
|
self.num_choices = 4
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
@ -126,9 +106,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
|
self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
|
||||||
)
|
)
|
||||||
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
|
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
|
||||||
target_mapping = torch.zeros(
|
target_mapping = torch.zeros(self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,)
|
||||||
self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
|
|
||||||
)
|
|
||||||
target_mapping[:, 0, -1] = 1.0 # predict last token
|
target_mapping[:, 0, -1] = 1.0 # predict last token
|
||||||
|
|
||||||
sequence_labels = None
|
sequence_labels = None
|
||||||
|
@ -270,9 +248,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
|
loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
|
||||||
|
|
||||||
loss_2, all_logits_2, mems_2 = model(
|
loss_2, all_logits_2, mems_2 = model(input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1)
|
||||||
input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1
|
|
||||||
)
|
|
||||||
|
|
||||||
logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
|
logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
|
||||||
|
|
||||||
|
@ -370,8 +346,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
list(result["end_top_index"].size()),
|
list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
||||||
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
|
|
||||||
)
|
)
|
||||||
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
|
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
|
||||||
self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
|
@ -472,8 +447,29 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
inputs_dict = {"input_ids": input_ids_1}
|
inputs_dict = {"input_ids": input_ids_1}
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
all_model_classes = (
|
||||||
|
(
|
||||||
|
XLNetModel,
|
||||||
|
XLNetLMHeadModel,
|
||||||
|
XLNetForTokenClassification,
|
||||||
|
XLNetForSequenceClassification,
|
||||||
|
XLNetForQuestionAnswering,
|
||||||
|
XLNetForMultipleChoice,
|
||||||
|
)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = (
|
||||||
|
(XLNetLMHeadModel,) if is_torch_available() else ()
|
||||||
|
) # TODO (PVP): Check other models whether language generation is also applicable
|
||||||
|
test_pruning = False
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.model_tester = XLNetModelTest.XLNetModelTester(self)
|
self.model_tester = XLNetModelTester(self)
|
||||||
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
|
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
|
|
Loading…
Reference in New Issue