diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py index c46d27b448..f5c0566bdc 100644 --- a/tests/test_modeling_albert.py +++ b/tests/test_modeling_albert.py @@ -37,6 +37,226 @@ if is_torch_available(): from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST +class AlbertModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.embedding_size = 16 + self.hidden_size = 36 + self.num_hidden_layers = 6 + self.num_hidden_groups = 6 + self.num_attention_heads = 6 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = AlbertConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + num_hidden_groups=self.num_hidden_groups, + ) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_albert_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = AlbertModel(config=config) + model.to(torch_device) + model.eval() + sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) + sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids) + sequence_output, pooled_output = model(input_ids) + + result = { + "sequence_output": sequence_output, + "pooled_output": pooled_output, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) + + def create_and_check_albert_for_pretraining( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = AlbertForPreTraining(config=config) + model.to(torch_device) + model.eval() + loss, prediction_scores, sop_scores = model( + input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + labels=token_labels, + sentence_order_label=sequence_labels, + ) + result = { + "loss": loss, + "prediction_scores": prediction_scores, + "sop_scores": sop_scores, + } + self.parent.assertListEqual( + list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual(list(result["sop_scores"].size()), [self.batch_size, config.num_labels]) + self.check_loss_output(result) + + def create_and_check_albert_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = AlbertForMaskedLM(config=config) + model.to(torch_device) + model.eval() + loss, prediction_scores = model( + input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels + ) + result = { + "loss": loss, + "prediction_scores": prediction_scores, + } + self.parent.assertListEqual( + list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.check_loss_output(result) + + def create_and_check_albert_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = AlbertForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + loss, start_logits, end_logits = model( + input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + ) + result = { + "loss": loss, + "start_logits": start_logits, + "end_logits": end_logits, + } + self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) + self.check_loss_output(result) + + def create_and_check_albert_for_sequence_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = AlbertForSequenceClassification(config) + model.to(torch_device) + model.eval() + loss, logits = model( + input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels + ) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_albert_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = AlbertForTokenClassification(config=config) + model.to(torch_device) + model.eval() + loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_albert_for_multiple_choice( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_choices = self.num_choices + model = AlbertForMultipleChoice(config=config) + model.to(torch_device) + model.eval() + multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + loss, logits = model( + multiple_choice_inputs_ids, + attention_mask=multiple_choice_input_mask, + token_type_ids=multiple_choice_token_type_ids, + labels=choice_labels, + ) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_torch class AlbertModelTest(ModelTesterMixin, unittest.TestCase): @@ -54,256 +274,8 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): else () ) - class AlbertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - embedding_size=16, - hidden_size=36, - num_hidden_layers=6, - num_hidden_groups=6, - num_attention_heads=6, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.embedding_size = embedding_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.num_hidden_groups = num_hidden_groups - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = AlbertConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - num_hidden_groups=self.num_hidden_groups, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_albert_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = AlbertModel(config=config) - model.to(torch_device) - model.eval() - sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids) - sequence_output, pooled_output = model(input_ids) - - result = { - "sequence_output": sequence_output, - "pooled_output": pooled_output, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) - - def create_and_check_albert_for_pretraining( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = AlbertForPreTraining(config=config) - model.to(torch_device) - model.eval() - loss, prediction_scores, sop_scores = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - labels=token_labels, - sentence_order_label=sequence_labels, - ) - result = { - "loss": loss, - "prediction_scores": prediction_scores, - "sop_scores": sop_scores, - } - self.parent.assertListEqual( - list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual(list(result["sop_scores"].size()), [self.batch_size, config.num_labels]) - self.check_loss_output(result) - - def create_and_check_albert_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = AlbertForMaskedLM(config=config) - model.to(torch_device) - model.eval() - loss, prediction_scores = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels - ) - result = { - "loss": loss, - "prediction_scores": prediction_scores, - } - self.parent.assertListEqual( - list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.check_loss_output(result) - - def create_and_check_albert_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = AlbertForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - loss, start_logits, end_logits = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - result = { - "loss": loss, - "start_logits": start_logits, - "end_logits": end_logits, - } - self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) - self.check_loss_output(result) - - def create_and_check_albert_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = AlbertForSequenceClassification(config) - model.to(torch_device) - model.eval() - loss, logits = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels]) - self.check_loss_output(result) - - def create_and_check_albert_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = AlbertForTokenClassification(config=config) - model.to(torch_device) - model.eval() - loss, logits = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels] - ) - self.check_loss_output(result) - - def create_and_check_albert_for_multiple_choice( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = AlbertForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - loss, logits = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - token_type_ids=multiple_choice_token_type_ids, - labels=choice_labels, - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices]) - self.check_loss_output(result) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = AlbertModelTest.AlbertModelTester(self) + self.model_tester = AlbertModelTester(self) self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_ctrl.py b/tests/test_modeling_ctrl.py index ede102b922..3e480ae047 100644 --- a/tests/test_modeling_ctrl.py +++ b/tests/test_modeling_ctrl.py @@ -27,6 +27,140 @@ if is_torch_available(): from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, CTRLLMHeadModel +class CTRLModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 14 + self.seq_length = 7 + self.is_training = True + self.use_token_type_ids = True + self.use_input_mask = True + self.use_labels = True + self.use_mc_token_ids = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + mc_token_ids = None + if self.use_mc_token_ids: + mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = CTRLConfig( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range + ) + + head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) + + return ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = CTRLModel(config=config) + model.to(torch_device) + model.eval() + + model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask) + model(input_ids, token_type_ids=token_type_ids) + sequence_output, presents = model(input_ids) + + result = { + "sequence_output": sequence_output, + "presents": presents, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertEqual(len(result["presents"]), config.n_layer) + + def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = CTRLLMHeadModel(config) + model.to(torch_device) + model.eval() + + loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) + + result = {"loss": loss, "lm_logits": lm_logits} + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + + ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask} + + return config, inputs_dict + + @require_torch class CTRLModelTest(ModelTesterMixin, unittest.TestCase): @@ -37,164 +171,8 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase): test_resize_embeddings = False test_head_masking = False - class CTRLModelTester(object): - def __init__( - self, - parent, - batch_size=14, - seq_length=7, - is_training=True, - use_token_type_ids=True, - use_input_mask=True, - use_labels=True, - use_mc_token_ids=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_token_type_ids = use_token_type_ids - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.use_mc_token_ids = use_mc_token_ids - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - mc_token_ids = None - if self.use_mc_token_ids: - mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = CTRLConfig( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range - ) - - head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) - - return ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = CTRLModel(config=config) - model.to(torch_device) - model.eval() - - model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask) - model(input_ids, token_type_ids=token_type_ids) - sequence_output, presents = model(input_ids) - - result = { - "sequence_output": sequence_output, - "presents": presents, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertEqual(len(result["presents"]), config.n_layer) - - def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = CTRLLMHeadModel(config) - model.to(torch_device) - model.eval() - - loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) - - result = {"loss": loss, "lm_logits": lm_logits} - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - - ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask} - - return config, inputs_dict - def setUp(self): - self.model_tester = CTRLModelTest.CTRLModelTester(self) + self.model_tester = CTRLModelTester(self) self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37) def test_config(self): diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py index 2043cac829..14d505abb6 100644 --- a/tests/test_modeling_distilbert.py +++ b/tests/test_modeling_distilbert.py @@ -34,27 +34,6 @@ if is_torch_available(): DistilBertForSequenceClassification, ) - -@require_torch -class DistilBertModelTest(ModelTesterMixin, unittest.TestCase): - - all_model_classes = ( - ( - DistilBertModel, - DistilBertForMaskedLM, - DistilBertForMultipleChoice, - DistilBertForQuestionAnswering, - DistilBertForSequenceClassification, - DistilBertForTokenClassification, - ) - if is_torch_available() - else None - ) - test_pruning = True - test_torchscript = True - test_resize_embeddings = True - test_head_masking = True - class DistilBertModelTester(object): def __init__( self, @@ -245,8 +224,29 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} return config, inputs_dict + +@require_torch +class DistilBertModelTest(ModelTesterMixin, unittest.TestCase): + + all_model_classes = ( + ( + DistilBertModel, + DistilBertForMaskedLM, + DistilBertForMultipleChoice, + DistilBertForQuestionAnswering, + DistilBertForSequenceClassification, + DistilBertForTokenClassification, + ) + if is_torch_available() + else None + ) + test_pruning = True + test_torchscript = True + test_resize_embeddings = True + test_head_masking = True + def setUp(self): - self.model_tester = DistilBertModelTest.DistilBertModelTester(self) + self.model_tester = DistilBertModelTester(self) self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37) def test_config(self): diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index d09fcc4b61..7a2f231973 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -36,6 +36,252 @@ if is_torch_available(): from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST +class ElectraModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1) + + config = ElectraConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + is_decoder=False, + initializer_range=self.initializer_range, + ) + + return ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_electra_model( + self, + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ): + model = ElectraModel(config=config) + model.to(torch_device) + model.eval() + (sequence_output,) = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) + (sequence_output,) = model(input_ids, token_type_ids=token_type_ids) + (sequence_output,) = model(input_ids) + + result = { + "sequence_output": sequence_output, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_electra_for_masked_lm( + self, + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ): + model = ElectraForMaskedLM(config=config) + model.to(torch_device) + model.eval() + loss, prediction_scores = model( + input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels + ) + result = { + "loss": loss, + "prediction_scores": prediction_scores, + } + self.parent.assertListEqual( + list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.check_loss_output(result) + + def create_and_check_electra_for_token_classification( + self, + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ): + config.num_labels = self.num_labels + model = ElectraForTokenClassification(config=config) + model.to(torch_device) + model.eval() + loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_electra_for_pretraining( + self, + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ): + config.num_labels = self.num_labels + model = ElectraForPreTraining(config=config) + model.to(torch_device) + model.eval() + loss, logits = model( + input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=fake_token_labels + ) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length]) + self.check_loss_output(result) + + def create_and_check_electra_for_sequence_classification( + self, + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ): + config.num_labels = self.num_labels + model = ElectraForSequenceClassification(config) + model.to(torch_device) + model.eval() + loss, logits = model( + input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels + ) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_electra_for_question_answering( + self, + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ): + model = ElectraForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + loss, start_logits, end_logits = model( + input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + ) + result = { + "loss": loss, + "start_logits": start_logits, + "end_logits": end_logits, + } + self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) + self.check_loss_output(result) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_torch class ElectraModelTest(ModelTesterMixin, unittest.TestCase): @@ -52,279 +298,8 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase): else () ) - class ElectraModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1) - - config = ElectraConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - is_decoder=False, - initializer_range=self.initializer_range, - ) - - return ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_electra_model( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ): - model = ElectraModel(config=config) - model.to(torch_device) - model.eval() - (sequence_output,) = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - (sequence_output,) = model(input_ids, token_type_ids=token_type_ids) - (sequence_output,) = model(input_ids) - - result = { - "sequence_output": sequence_output, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_electra_for_masked_lm( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ): - model = ElectraForMaskedLM(config=config) - model.to(torch_device) - model.eval() - loss, prediction_scores = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels - ) - result = { - "loss": loss, - "prediction_scores": prediction_scores, - } - self.parent.assertListEqual( - list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.check_loss_output(result) - - def create_and_check_electra_for_token_classification( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ): - config.num_labels = self.num_labels - model = ElectraForTokenClassification(config=config) - model.to(torch_device) - model.eval() - loss, logits = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels] - ) - self.check_loss_output(result) - - def create_and_check_electra_for_pretraining( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ): - config.num_labels = self.num_labels - model = ElectraForPreTraining(config=config) - model.to(torch_device) - model.eval() - loss, logits = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=fake_token_labels - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length]) - self.check_loss_output(result) - - def create_and_check_electra_for_sequence_classification( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ): - config.num_labels = self.num_labels - model = ElectraForSequenceClassification(config) - model.to(torch_device) - model.eval() - loss, logits = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels]) - self.check_loss_output(result) - - def create_and_check_electra_for_question_answering( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ): - model = ElectraForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - loss, start_logits, end_logits = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - result = { - "loss": loss, - "start_logits": start_logits, - "end_logits": end_logits, - } - self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) - self.check_loss_output(result) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = ElectraModelTest.ElectraModelTester(self) + self.model_tester = ElectraModelTester(self) self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_flaubert.py b/tests/test_modeling_flaubert.py index 5dabea0c10..07c76d6ec3 100644 --- a/tests/test_modeling_flaubert.py +++ b/tests/test_modeling_flaubert.py @@ -35,6 +35,281 @@ if is_torch_available(): from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST +class FlaubertModelTester(object): + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_lengths = True + self.use_token_type_ids = True + self.use_labels = True + self.gelu_activation = True + self.sinusoidal_embeddings = False + self.causal = False + self.asm = False + self.n_langs = 2 + self.vocab_size = 99 + self.n_special = 0 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 12 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.summary_type = "last" + self.use_proj = None + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() + + input_lengths = None + if self.use_input_lengths: + input_lengths = ( + ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 + ) # small variation of seq_length + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs) + + sequence_labels = None + token_labels = None + is_impossible_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + is_impossible_labels = ids_tensor([self.batch_size], 2).float() + + config = FlaubertConfig( + vocab_size=self.vocab_size, + n_special=self.n_special, + emb_dim=self.hidden_size, + n_layers=self.num_hidden_layers, + n_heads=self.num_attention_heads, + dropout=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + gelu_activation=self.gelu_activation, + sinusoidal_embeddings=self.sinusoidal_embeddings, + asm=self.asm, + causal=self.causal, + n_langs=self.n_langs, + max_position_embeddings=self.max_position_embeddings, + initializer_range=self.initializer_range, + summary_type=self.summary_type, + use_proj=self.use_proj, + ) + + return ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_flaubert_model( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = FlaubertModel(config=config) + model.to(torch_device) + model.eval() + outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids) + outputs = model(input_ids, langs=token_type_ids) + outputs = model(input_ids) + sequence_output = outputs[0] + result = { + "sequence_output": sequence_output, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_flaubert_lm_head( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = FlaubertWithLMHeadModel(config) + model.to(torch_device) + model.eval() + + loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels) + + result = { + "loss": loss, + "logits": logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]) + + def create_and_check_flaubert_simple_qa( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = FlaubertForQuestionAnsweringSimple(config) + model.to(torch_device) + model.eval() + + outputs = model(input_ids) + + outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) + loss, start_logits, end_logits = outputs + + result = { + "loss": loss, + "start_logits": start_logits, + "end_logits": end_logits, + } + self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) + self.check_loss_output(result) + + def create_and_check_flaubert_qa( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = FlaubertForQuestionAnswering(config) + model.to(torch_device) + model.eval() + + outputs = model(input_ids) + start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs + + outputs = model( + input_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + cls_index=sequence_labels, + is_impossible=is_impossible_labels, + p_mask=input_mask, + ) + + outputs = model( + input_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + cls_index=sequence_labels, + is_impossible=is_impossible_labels, + ) + + (total_loss,) = outputs + + outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) + + (total_loss,) = outputs + + result = { + "loss": total_loss, + "start_top_log_probs": start_top_log_probs, + "start_top_index": start_top_index, + "end_top_log_probs": end_top_log_probs, + "end_top_index": end_top_index, + "cls_logits": cls_logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top] + ) + self.parent.assertListEqual( + list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top] + ) + self.parent.assertListEqual( + list(result["end_top_log_probs"].size()), + [self.batch_size, model.config.start_n_top * model.config.end_n_top], + ) + self.parent.assertListEqual( + list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top], + ) + self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) + + def create_and_check_flaubert_sequence_classif( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = FlaubertForSequenceClassification(config) + model.to(torch_device) + model.eval() + + (logits,) = model(input_ids) + loss, logits = model(input_ids, labels=sequence_labels) + + result = { + "loss": loss, + "logits": logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths} + return config, inputs_dict + + @require_torch class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): @@ -50,316 +325,8 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): else () ) - class FlaubertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_lengths=True, - use_token_type_ids=True, - use_labels=True, - gelu_activation=True, - sinusoidal_embeddings=False, - causal=False, - asm=False, - n_langs=2, - vocab_size=99, - n_special=0, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - summary_type="last", - use_proj=True, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_lengths = use_input_lengths - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.gelu_activation = gelu_activation - self.sinusoidal_embeddings = sinusoidal_embeddings - self.asm = asm - self.n_langs = n_langs - self.vocab_size = vocab_size - self.n_special = n_special - self.summary_type = summary_type - self.causal = causal - self.use_proj = use_proj - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.n_langs = n_langs - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.summary_type = summary_type - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() - - input_lengths = None - if self.use_input_lengths: - input_lengths = ( - ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 - ) # small variation of seq_length - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs) - - sequence_labels = None - token_labels = None - is_impossible_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - is_impossible_labels = ids_tensor([self.batch_size], 2).float() - - config = FlaubertConfig( - vocab_size=self.vocab_size, - n_special=self.n_special, - emb_dim=self.hidden_size, - n_layers=self.num_hidden_layers, - n_heads=self.num_attention_heads, - dropout=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - gelu_activation=self.gelu_activation, - sinusoidal_embeddings=self.sinusoidal_embeddings, - asm=self.asm, - causal=self.causal, - n_langs=self.n_langs, - max_position_embeddings=self.max_position_embeddings, - initializer_range=self.initializer_range, - summary_type=self.summary_type, - use_proj=self.use_proj, - ) - - return ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_flaubert_model( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = FlaubertModel(config=config) - model.to(torch_device) - model.eval() - outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids) - outputs = model(input_ids, langs=token_type_ids) - outputs = model(input_ids) - sequence_output = outputs[0] - result = { - "sequence_output": sequence_output, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_flaubert_lm_head( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = FlaubertWithLMHeadModel(config) - model.to(torch_device) - model.eval() - - loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels) - - result = { - "loss": loss, - "logits": logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_flaubert_simple_qa( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = FlaubertForQuestionAnsweringSimple(config) - model.to(torch_device) - model.eval() - - outputs = model(input_ids) - - outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) - loss, start_logits, end_logits = outputs - - result = { - "loss": loss, - "start_logits": start_logits, - "end_logits": end_logits, - } - self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) - self.check_loss_output(result) - - def create_and_check_flaubert_qa( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = FlaubertForQuestionAnswering(config) - model.to(torch_device) - model.eval() - - outputs = model(input_ids) - start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs - - outputs = model( - input_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - cls_index=sequence_labels, - is_impossible=is_impossible_labels, - p_mask=input_mask, - ) - - outputs = model( - input_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - cls_index=sequence_labels, - is_impossible=is_impossible_labels, - ) - - (total_loss,) = outputs - - outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) - - (total_loss,) = outputs - - result = { - "loss": total_loss, - "start_top_log_probs": start_top_log_probs, - "start_top_index": start_top_index, - "end_top_log_probs": end_top_log_probs, - "end_top_index": end_top_index, - "cls_logits": cls_logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top] - ) - self.parent.assertListEqual( - list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top] - ) - self.parent.assertListEqual( - list(result["end_top_log_probs"].size()), - [self.batch_size, model.config.start_n_top * model.config.end_n_top], - ) - self.parent.assertListEqual( - list(result["end_top_index"].size()), - [self.batch_size, model.config.start_n_top * model.config.end_n_top], - ) - self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) - - def create_and_check_flaubert_sequence_classif( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = FlaubertForSequenceClassification(config) - model.to(torch_device) - model.eval() - - (logits,) = model(input_ids) - loss, logits = model(input_ids, labels=sequence_labels) - - result = { - "loss": loss, - "logits": logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size] - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths} - return config, inputs_dict - def setUp(self): - self.model_tester = FlaubertModelTest.FlaubertModelTester(self) + self.model_tester = FlaubertModelTester(self) self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37) def test_config(self): diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py index 01228963ac..eb4161f6dc 100644 --- a/tests/test_modeling_gpt2.py +++ b/tests/test_modeling_gpt2.py @@ -34,6 +34,269 @@ if is_torch_available(): ) +class GPT2ModelTester: + def __init__( + self, + parent, + batch_size=14, + seq_length=7, + is_training=True, + use_token_type_ids=True, + use_input_mask=True, + use_labels=True, + use_mc_token_ids=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + ): + self.parent = parent + self.batch_size = 14 + self.seq_length = 7 + self.is_training = True + self.use_token_type_ids = True + self.use_input_mask = True + self.use_labels = True + self.use_mc_token_ids = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0, 1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + self.bos_token_id = vocab_size - 1 + self.eos_token_id = vocab_size - 1 + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + mc_token_ids = None + if self.use_mc_token_ids: + mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = GPT2Config( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings, + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range + bos_token_id=self.bos_token_id, + eos_token_id=self.eos_token_id, + ) + + head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) + + return ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = GPT2Model(config=config) + model.to(torch_device) + model.eval() + + model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask) + model(input_ids, token_type_ids=token_type_ids) + sequence_output, presents = model(input_ids) + + result = { + "sequence_output": sequence_output, + "presents": presents, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size], + ) + self.parent.assertEqual(len(result["presents"]), config.n_layer) + + def create_and_check_gpt2_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = GPT2Model(config=config) + model.to(torch_device) + model.eval() + + # first forward pass + output, past = model(input_ids, token_type_ids=token_type_ids) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + next_token_types = ids_tensor([self.batch_size, 1], self.type_vocab_size) + + # append to next input_ids and token_type_ids + next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) + next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1) + + output_from_no_past, _ = model(next_input_ids, token_type_ids=next_token_type_ids) + output_from_past, _ = model(next_tokens, token_type_ids=next_token_types, past=past) + + # select random slice + random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() + output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() + + # test that outputs are equal for slice + self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) + + def create_and_check_gpt2_model_attention_mask_past( + self, config, input_ids, input_mask, head_mask, token_type_ids, *args + ): + model = GPT2Model(config=config) + model.to(torch_device) + model.eval() + + # create attention mask + attn_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device) + half_seq_length = self.seq_length // 2 + attn_mask[:, half_seq_length:] = 0 + + # first forward pass + output, past = model(input_ids, attention_mask=attn_mask) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + + # change a random masked slice from input_ids + random_seq_idx_to_change = ids_tensor((1,), half_seq_length).item() + 1 + random_other_next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size).squeeze(-1) + input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens + + # append to next input_ids and attn_mask + next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) + attn_mask = torch.cat( + [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1, + ) + + # get two different outputs + output_from_no_past, _ = model(next_input_ids, attention_mask=attn_mask) + output_from_past, _ = model(next_tokens, past=past, attention_mask=attn_mask) + + # select random slice + random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() + output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() + + # test that outputs are equal for slice + self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) + + def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = GPT2LMHeadModel(config) + model.to(torch_device) + model.eval() + + loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) + + result = {"loss": loss, "lm_logits": lm_logits} + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + + def create_and_check_double_lm_head_model( + self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args + ): + model = GPT2DoubleHeadsModel(config) + model.to(torch_device) + model.eval() + + multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + + inputs = { + "input_ids": multiple_choice_inputs_ids, + "mc_token_ids": mc_token_ids, + "attention_mask": multiple_choice_input_mask, + "token_type_ids": multiple_choice_token_type_ids, + "labels": multiple_choice_inputs_ids, + } + + loss, lm_logits, mc_logits, _ = model(**inputs) + + result = {"loss": loss, "lm_logits": lm_logits, "mc_logits": mc_logits} + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["lm_logits"].size()), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size], + ) + self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + + ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + + inputs_dict = { + "input_ids": input_ids, + "token_type_ids": token_type_ids, + "head_mask": head_mask, + } + + return config, inputs_dict + + @require_torch class GPT2ModelTest(ModelTesterMixin, unittest.TestCase): @@ -42,271 +305,8 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase): (GPT2LMHeadModel,) if is_torch_available() else () ) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly - class GPT2ModelTester(object): - def __init__( - self, - parent, - batch_size=14, - seq_length=7, - is_training=True, - use_token_type_ids=True, - use_input_mask=True, - use_labels=True, - use_mc_token_ids=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_token_type_ids = use_token_type_ids - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.use_mc_token_ids = use_mc_token_ids - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.bos_token_id = vocab_size - 1 - self.eos_token_id = vocab_size - 1 - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - mc_token_ids = None - if self.use_mc_token_ids: - mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = GPT2Config( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings, - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range - bos_token_id=self.bos_token_id, - eos_token_id=self.eos_token_id, - ) - - head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) - - return ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = GPT2Model(config=config) - model.to(torch_device) - model.eval() - - model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask) - model(input_ids, token_type_ids=token_type_ids) - sequence_output, presents = model(input_ids) - - result = { - "sequence_output": sequence_output, - "presents": presents, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size], - ) - self.parent.assertEqual(len(result["presents"]), config.n_layer) - - def create_and_check_gpt2_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = GPT2Model(config=config) - model.to(torch_device) - model.eval() - - # first forward pass - output, past = model(input_ids, token_type_ids=token_type_ids) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - next_token_types = ids_tensor([self.batch_size, 1], self.type_vocab_size) - - # append to next input_ids and token_type_ids - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1) - - output_from_no_past, _ = model(next_input_ids, token_type_ids=next_token_type_ids) - output_from_past, _ = model(next_tokens, token_type_ids=next_token_types, past=past) - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def create_and_check_gpt2_model_attention_mask_past( - self, config, input_ids, input_mask, head_mask, token_type_ids, *args - ): - model = GPT2Model(config=config) - model.to(torch_device) - model.eval() - - # create attention mask - attn_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device) - half_seq_length = self.seq_length // 2 - attn_mask[:, half_seq_length:] = 0 - - # first forward pass - output, past = model(input_ids, attention_mask=attn_mask) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # change a random masked slice from input_ids - random_seq_idx_to_change = ids_tensor((1,), half_seq_length).item() + 1 - random_other_next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size).squeeze(-1) - input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens - - # append to next input_ids and attn_mask - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - attn_mask = torch.cat( - [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1, - ) - - # get two different outputs - output_from_no_past, _ = model(next_input_ids, attention_mask=attn_mask) - output_from_past, _ = model(next_tokens, past=past, attention_mask=attn_mask) - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = GPT2LMHeadModel(config) - model.to(torch_device) - model.eval() - - loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) - - result = {"loss": loss, "lm_logits": lm_logits} - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - - def create_and_check_double_lm_head_model( - self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args - ): - model = GPT2DoubleHeadsModel(config) - model.to(torch_device) - model.eval() - - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - - inputs = { - "input_ids": multiple_choice_inputs_ids, - "mc_token_ids": mc_token_ids, - "attention_mask": multiple_choice_input_mask, - "token_type_ids": multiple_choice_token_type_ids, - "labels": multiple_choice_inputs_ids, - } - - loss, lm_logits, mc_logits, _ = model(**inputs) - - result = {"loss": loss, "lm_logits": lm_logits, "mc_logits": mc_logits} - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["lm_logits"].size()), - [self.batch_size, self.num_choices, self.seq_length, self.vocab_size], - ) - self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - - ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - - inputs_dict = { - "input_ids": input_ids, - "token_type_ids": token_type_ids, - "head_mask": head_mask, - } - - return config, inputs_dict - def setUp(self): - self.model_tester = GPT2ModelTest.GPT2ModelTester(self) + self.model_tester = GPT2ModelTester(self) self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37) def test_config(self): diff --git a/tests/test_modeling_longformer.py b/tests/test_modeling_longformer.py index d1c7beca94..7579ee38ba 100644 --- a/tests/test_modeling_longformer.py +++ b/tests/test_modeling_longformer.py @@ -36,56 +36,33 @@ if is_torch_available(): ) -class LongformerModelTester(object): +class LongformerModelTester: def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - attention_window=4, + self, parent, ): self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.attention_window = attention_window + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + self.attention_window = 4 # `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size # [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention diff --git a/tests/test_modeling_openai.py b/tests/test_modeling_openai.py index d56f4627a6..89dc0dfe0d 100644 --- a/tests/test_modeling_openai.py +++ b/tests/test_modeling_openai.py @@ -34,6 +34,139 @@ if is_torch_available(): ) +class OpenAIGPTModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = OpenAIGPTConfig( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range + ) + + head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) + + return ( + config, + input_ids, + head_mask, + token_type_ids, + sequence_labels, + token_labels, + choice_labels, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_openai_gpt_model(self, config, input_ids, head_mask, token_type_ids, *args): + model = OpenAIGPTModel(config=config) + model.to(torch_device) + model.eval() + + model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask) + model(input_ids, token_type_ids=token_type_ids) + (sequence_output,) = model(input_ids) + + result = {"sequence_output": sequence_output} + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size], + ) + + def create_and_check_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): + model = OpenAIGPTLMHeadModel(config) + model.to(torch_device) + model.eval() + + loss, lm_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) + + result = {"loss": loss, "lm_logits": lm_logits} + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + + def create_and_check_double_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): + model = OpenAIGPTDoubleHeadsModel(config) + model.to(torch_device) + model.eval() + + loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) + + result = {"loss": loss, "lm_logits": lm_logits} + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + head_mask, + token_type_ids, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = { + "input_ids": input_ids, + "token_type_ids": token_type_ids, + "head_mask": head_mask, + } + + return config, inputs_dict + + @require_torch class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase): @@ -44,161 +177,8 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase): (OpenAIGPTLMHeadModel,) if is_torch_available() else () ) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly - class OpenAIGPTModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = OpenAIGPTConfig( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range - ) - - head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) - - return ( - config, - input_ids, - head_mask, - token_type_ids, - sequence_labels, - token_labels, - choice_labels, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_openai_gpt_model(self, config, input_ids, head_mask, token_type_ids, *args): - model = OpenAIGPTModel(config=config) - model.to(torch_device) - model.eval() - - model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask) - model(input_ids, token_type_ids=token_type_ids) - (sequence_output,) = model(input_ids) - - result = {"sequence_output": sequence_output} - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size], - ) - - def create_and_check_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): - model = OpenAIGPTLMHeadModel(config) - model.to(torch_device) - model.eval() - - loss, lm_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) - - result = {"loss": loss, "lm_logits": lm_logits} - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - - def create_and_check_double_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): - model = OpenAIGPTDoubleHeadsModel(config) - model.to(torch_device) - model.eval() - - loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) - - result = {"loss": loss, "lm_logits": lm_logits} - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - head_mask, - token_type_ids, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = { - "input_ids": input_ids, - "token_type_ids": token_type_ids, - "head_mask": head_mask, - } - - return config, inputs_dict - def setUp(self): - self.model_tester = OpenAIGPTModelTest.OpenAIGPTModelTester(self) + self.model_tester = OpenAIGPTModelTester(self) self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37) def test_config(self): diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py index dbfb6e6ba4..e096ad4f29 100644 --- a/tests/test_modeling_roberta.py +++ b/tests/test_modeling_roberta.py @@ -39,6 +39,183 @@ if is_torch_available(): from transformers.modeling_utils import create_position_ids_from_input_ids +class RobertaModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = RobertaConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + ) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_roberta_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = RobertaModel(config=config) + model.to(torch_device) + model.eval() + sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) + sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids) + sequence_output, pooled_output = model(input_ids) + + result = { + "sequence_output": sequence_output, + "pooled_output": pooled_output, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) + + def create_and_check_roberta_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = RobertaForMaskedLM(config=config) + model.to(torch_device) + model.eval() + loss, prediction_scores = model( + input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels + ) + result = { + "loss": loss, + "prediction_scores": prediction_scores, + } + self.parent.assertListEqual( + list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.check_loss_output(result) + + def create_and_check_roberta_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = RobertaForTokenClassification(config=config) + model.to(torch_device) + model.eval() + loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_roberta_for_multiple_choice( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_choices = self.num_choices + model = RobertaForMultipleChoice(config=config) + model.to(torch_device) + model.eval() + multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + loss, logits = model( + multiple_choice_inputs_ids, + attention_mask=multiple_choice_input_mask, + token_type_ids=multiple_choice_token_type_ids, + labels=choice_labels, + ) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices]) + self.check_loss_output(result) + + def create_and_check_roberta_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = RobertaForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + loss, start_logits, end_logits = model( + input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + ) + result = { + "loss": loss, + "start_logits": start_logits, + "end_logits": end_logits, + } + self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) + self.check_loss_output(result) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_torch class RobertaModelTest(ModelTesterMixin, unittest.TestCase): @@ -55,210 +232,8 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase): else () ) - class RobertaModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = RobertaConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_roberta_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = RobertaModel(config=config) - model.to(torch_device) - model.eval() - sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) - sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids) - sequence_output, pooled_output = model(input_ids) - - result = { - "sequence_output": sequence_output, - "pooled_output": pooled_output, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) - - def create_and_check_roberta_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = RobertaForMaskedLM(config=config) - model.to(torch_device) - model.eval() - loss, prediction_scores = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels - ) - result = { - "loss": loss, - "prediction_scores": prediction_scores, - } - self.parent.assertListEqual( - list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.check_loss_output(result) - - def create_and_check_roberta_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = RobertaForTokenClassification(config=config) - model.to(torch_device) - model.eval() - loss, logits = model( - input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels] - ) - self.check_loss_output(result) - - def create_and_check_roberta_for_multiple_choice( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = RobertaForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - loss, logits = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - token_type_ids=multiple_choice_token_type_ids, - labels=choice_labels, - ) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices]) - self.check_loss_output(result) - - def create_and_check_roberta_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = RobertaForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - loss, start_logits, end_logits = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - result = { - "loss": loss, - "start_logits": start_logits, - "end_logits": end_logits, - } - self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) - self.check_loss_output(result) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = RobertaModelTest.RobertaModelTester(self) + self.model_tester = RobertaModelTester(self) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_t5.py b/tests/test_modeling_t5.py index 766bda3bcb..dd480873c7 100644 --- a/tests/test_modeling_t5.py +++ b/tests/test_modeling_t5.py @@ -30,6 +30,268 @@ if is_torch_available(): from transformers.tokenization_t5 import T5Tokenizer +class T5ModelTester: + def __init__(self, parent): + self.parent = parent + self.batch_size = 13 + self.encoder_seq_length = 7 + self.decoder_seq_length = 9 + self.is_training = True + self.use_attention_mask = True + self.use_labels = True + self.vocab_size = 99 + self.n_positions = 14 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.d_ff = 37 + self.relative_attention_num_buckets = 8 + self.dropout_rate = 0.1 + self.initializer_factor = 0.002 + self.eos_token_id = 1 + self.pad_token_id = 0 + self.decoder_start_token_id = 0 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size) + decoder_input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) + + attention_mask = None + decoder_attention_mask = None + if self.use_attention_mask: + attention_mask = ids_tensor([self.batch_size, self.encoder_seq_length], vocab_size=2) + decoder_attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2) + + lm_labels = None + if self.use_labels: + lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) + + config = T5Config( + vocab_size=self.vocab_size, + n_positions=self.n_positions, + d_model=self.hidden_size, + d_ff=self.d_ff, + d_kv=self.hidden_size // self.num_attention_heads, + num_layers=self.num_hidden_layers, + num_heads=self.num_attention_heads, + relative_attention_num_buckets=self.relative_attention_num_buckets, + dropout_rate=self.dropout_rate, + initializer_factor=self.initializer_factor, + eos_token_id=self.eos_token_id, + bos_token_id=self.pad_token_id, + pad_token_id=self.pad_token_id, + decoder_start_token_id=self.decoder_start_token_id, + ) + + return ( + config, + input_ids, + decoder_input_ids, + attention_mask, + decoder_attention_mask, + lm_labels, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def check_prepare_lm_labels_via_shift_left( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5Model(config=config) + model.to(torch_device) + model.eval() + + # make sure that lm_labels are correctly padded from the right + lm_labels.masked_fill_((lm_labels == self.decoder_start_token_id), self.eos_token_id) + + # add casaul pad token mask + triangular_mask = torch.tril(lm_labels.new_ones(lm_labels.shape)).logical_not() + lm_labels.masked_fill_(triangular_mask, self.pad_token_id) + decoder_input_ids = model._shift_right(lm_labels) + + for i, (decoder_input_ids_slice, lm_labels_slice) in enumerate(zip(decoder_input_ids, lm_labels)): + # first item + self.parent.assertEqual(decoder_input_ids_slice[0].item(), self.decoder_start_token_id) + if i < decoder_input_ids_slice.shape[-1]: + if i < decoder_input_ids.shape[-1] - 1: + # items before diagonal + self.parent.assertListEqual( + decoder_input_ids_slice[1 : i + 1].tolist(), lm_labels_slice[:i].tolist() + ) + # pad items after diagonal + if i < decoder_input_ids.shape[-1] - 2: + self.parent.assertListEqual( + decoder_input_ids_slice[i + 2 :].tolist(), lm_labels_slice[i + 1 : -1].tolist() + ) + else: + # all items after square + self.parent.assertListEqual(decoder_input_ids_slice[1:].tolist(), lm_labels_slice[:-1].tolist()) + + def create_and_check_t5_model( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5Model(config=config) + model.to(torch_device) + model.eval() + decoder_output, decoder_past, encoder_output = model( + input_ids=input_ids, + decoder_input_ids=decoder_input_ids, + attention_mask=attention_mask, + decoder_attention_mask=decoder_attention_mask, + ) + decoder_output, decoder_past, encoder_output = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) + + result = { + "encoder_output": encoder_output, + "decoder_output": decoder_output, + "decoder_past": decoder_past, + } + self.parent.assertListEqual( + list(result["encoder_output"].size()), [self.batch_size, self.encoder_seq_length, self.hidden_size] + ) + self.parent.assertListEqual( + list(result["decoder_output"].size()), [self.batch_size, self.decoder_seq_length, self.hidden_size] + ) + self.parent.assertEqual(len(decoder_past), 2) + # decoder_past[0] should correspond to encoder output + self.parent.assertTrue(torch.all(decoder_past[0][0] == encoder_output)) + # There should be `num_layers` key value embeddings stored in decoder_past[1] + self.parent.assertEqual(len(decoder_past[1]), config.num_layers) + # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple + self.parent.assertEqual(len(decoder_past[1][0]), 4) + + def create_and_check_t5_with_lm_head( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5ForConditionalGeneration(config=config) + model.to(torch_device) + model.eval() + outputs = model( + input_ids=input_ids, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + labels=lm_labels, + ) + loss, prediction_scores, _, _ = outputs + self.parent.assertEqual(len(outputs), 4) + result = { + "loss": loss, + "prediction_scores": prediction_scores, + } + self.parent.assertListEqual( + list(result["prediction_scores"].size()), [self.batch_size, self.decoder_seq_length, self.vocab_size] + ) + self.check_loss_output(result) + + def create_and_check_t5_decoder_model_past( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5Model(config=config).get_decoder() + model.to(torch_device) + model.eval() + + # first forward pass + output, past_key_value_states = model(input_ids, use_cache=True) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + + # append to next input_ids and + next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) + + output_from_no_past = model(next_input_ids)[0] + output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0] + + # select random slice + random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() + output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() + + # test that outputs are equal for slice + self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) + + def create_and_check_t5_decoder_model_attention_mask_past( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5Model(config=config).get_decoder() + model.to(torch_device) + model.eval() + + # create attention mask + attn_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device) + + half_seq_length = input_ids.shape[-1] // 2 + attn_mask[:, half_seq_length:] = 0 + + # first forward pass + output, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + + # change a random masked slice from input_ids + random_seq_idx_to_change = ids_tensor((1,), half_seq_length).item() + 1 + random_other_next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size).squeeze(-1) + input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens + + # append to next input_ids and attn_mask + next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) + attn_mask = torch.cat( + [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1, + ) + + # get two different outputs + output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] + output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0] + + # select random slice + random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() + output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() + + # test that outputs are equal for slice + self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) + + def create_t5_and_check_t5_generate_with_past_key_value_states( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5ForConditionalGeneration(config=config) + model.to(torch_device) + model.eval() + torch.manual_seed(0) + output_without_past_cache = model.generate( + input_ids[:1], num_beams=2, max_length=5, do_sample=True, use_cache=False + ) + torch.manual_seed(0) + output_with_past_cache = model.generate(input_ids[:1], num_beams=2, max_length=5, do_sample=True) + self.parent.assertTrue(torch.all(output_with_past_cache == output_without_past_cache)) + + def create_and_check_t5_model_fp16_forward( + self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, + ): + model = T5Model(config=config) + model.to(torch_device) + model.half() + model.eval() + output = model(input_ids, decoder_input_ids=input_ids, attention_mask=attention_mask)[0] + self.parent.assertFalse(torch.isnan(output).any().item()) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,) = config_and_inputs + + inputs_dict = { + "input_ids": input_ids, + "attention_mask": attention_mask, + "decoder_input_ids": decoder_input_ids, + "decoder_attention_mask": decoder_attention_mask, + "use_cache": False, + } + return config, inputs_dict + + @require_torch class T5ModelTest(ModelTesterMixin, unittest.TestCase): @@ -40,302 +302,8 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): test_resize_embeddings = False is_encoder_decoder = True - class T5ModelTester(object): - def __init__( - self, - parent, - batch_size=13, - encoder_seq_length=7, - decoder_seq_length=9, - is_training=True, - use_attention_mask=True, - use_labels=True, - vocab_size=99, - n_positions=14, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - d_ff=37, - relative_attention_num_buckets=8, - dropout_rate=0.1, - initializer_factor=0.002, - eos_token_id=1, - pad_token_id=0, - decoder_start_token_id=0, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.encoder_seq_length = encoder_seq_length - self.decoder_seq_length = decoder_seq_length - self.is_training = is_training - self.use_attention_mask = use_attention_mask - self.use_labels = use_labels - self.vocab_size = vocab_size - self.n_positions = n_positions - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.d_ff = d_ff - self.relative_attention_num_buckets = relative_attention_num_buckets - self.dropout_rate = dropout_rate - self.initializer_factor = initializer_factor - self.scope = scope - self.eos_token_id = eos_token_id - self.pad_token_id = pad_token_id - self.decoder_start_token_id = decoder_start_token_id - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size) - decoder_input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) - - attention_mask = None - decoder_attention_mask = None - if self.use_attention_mask: - attention_mask = ids_tensor([self.batch_size, self.encoder_seq_length], vocab_size=2) - decoder_attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2) - - lm_labels = None - if self.use_labels: - lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) - - config = T5Config( - vocab_size=self.vocab_size, - n_positions=self.n_positions, - d_model=self.hidden_size, - d_ff=self.d_ff, - d_kv=self.hidden_size // self.num_attention_heads, - num_layers=self.num_hidden_layers, - num_heads=self.num_attention_heads, - relative_attention_num_buckets=self.relative_attention_num_buckets, - dropout_rate=self.dropout_rate, - initializer_factor=self.initializer_factor, - eos_token_id=self.eos_token_id, - bos_token_id=self.pad_token_id, - pad_token_id=self.pad_token_id, - decoder_start_token_id=self.decoder_start_token_id, - ) - - return ( - config, - input_ids, - decoder_input_ids, - attention_mask, - decoder_attention_mask, - lm_labels, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def check_prepare_lm_labels_via_shift_left( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5Model(config=config) - model.to(torch_device) - model.eval() - - # make sure that lm_labels are correctly padded from the right - lm_labels.masked_fill_((lm_labels == self.decoder_start_token_id), self.eos_token_id) - - # add casaul pad token mask - triangular_mask = torch.tril(lm_labels.new_ones(lm_labels.shape)).logical_not() - lm_labels.masked_fill_(triangular_mask, self.pad_token_id) - decoder_input_ids = model._shift_right(lm_labels) - - for i, (decoder_input_ids_slice, lm_labels_slice) in enumerate(zip(decoder_input_ids, lm_labels)): - # first item - self.parent.assertEqual(decoder_input_ids_slice[0].item(), self.decoder_start_token_id) - if i < decoder_input_ids_slice.shape[-1]: - if i < decoder_input_ids.shape[-1] - 1: - # items before diagonal - self.parent.assertListEqual( - decoder_input_ids_slice[1 : i + 1].tolist(), lm_labels_slice[:i].tolist() - ) - # pad items after diagonal - if i < decoder_input_ids.shape[-1] - 2: - self.parent.assertListEqual( - decoder_input_ids_slice[i + 2 :].tolist(), lm_labels_slice[i + 1 : -1].tolist() - ) - else: - # all items after square - self.parent.assertListEqual(decoder_input_ids_slice[1:].tolist(), lm_labels_slice[:-1].tolist()) - - def create_and_check_t5_model( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5Model(config=config) - model.to(torch_device) - model.eval() - decoder_output, decoder_past, encoder_output = model( - input_ids=input_ids, - decoder_input_ids=decoder_input_ids, - attention_mask=attention_mask, - decoder_attention_mask=decoder_attention_mask, - ) - decoder_output, decoder_past, encoder_output = model( - input_ids=input_ids, decoder_input_ids=decoder_input_ids - ) - - result = { - "encoder_output": encoder_output, - "decoder_output": decoder_output, - "decoder_past": decoder_past, - } - self.parent.assertListEqual( - list(result["encoder_output"].size()), [self.batch_size, self.encoder_seq_length, self.hidden_size] - ) - self.parent.assertListEqual( - list(result["decoder_output"].size()), [self.batch_size, self.decoder_seq_length, self.hidden_size] - ) - self.parent.assertEqual(len(decoder_past), 2) - # decoder_past[0] should correspond to encoder output - self.parent.assertTrue(torch.all(decoder_past[0][0] == encoder_output)) - # There should be `num_layers` key value embeddings stored in decoder_past[1] - self.parent.assertEqual(len(decoder_past[1]), config.num_layers) - # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple - self.parent.assertEqual(len(decoder_past[1][0]), 4) - - def create_and_check_t5_with_lm_head( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5ForConditionalGeneration(config=config) - model.to(torch_device) - model.eval() - outputs = model( - input_ids=input_ids, - decoder_input_ids=decoder_input_ids, - decoder_attention_mask=decoder_attention_mask, - labels=lm_labels, - ) - loss, prediction_scores, _, _ = outputs - self.parent.assertEqual(len(outputs), 4) - result = { - "loss": loss, - "prediction_scores": prediction_scores, - } - self.parent.assertListEqual( - list(result["prediction_scores"].size()), [self.batch_size, self.decoder_seq_length, self.vocab_size] - ) - self.check_loss_output(result) - - def create_and_check_t5_decoder_model_past( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5Model(config=config).get_decoder() - model.to(torch_device) - model.eval() - - # first forward pass - output, past_key_value_states = model(input_ids, use_cache=True) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # append to next input_ids and - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - - output_from_no_past = model(next_input_ids)[0] - output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def create_and_check_t5_decoder_model_attention_mask_past( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5Model(config=config).get_decoder() - model.to(torch_device) - model.eval() - - # create attention mask - attn_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device) - - half_seq_length = input_ids.shape[-1] // 2 - attn_mask[:, half_seq_length:] = 0 - - # first forward pass - output, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # change a random masked slice from input_ids - random_seq_idx_to_change = ids_tensor((1,), half_seq_length).item() + 1 - random_other_next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size).squeeze(-1) - input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens - - # append to next input_ids and attn_mask - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - attn_mask = torch.cat( - [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1, - ) - - # get two different outputs - output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] - output_from_past = model( - next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask - )[0] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach() - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def create_t5_and_check_t5_generate_with_past_key_value_states( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5ForConditionalGeneration(config=config) - model.to(torch_device) - model.eval() - torch.manual_seed(0) - output_without_past_cache = model.generate( - input_ids[:1], num_beams=2, max_length=5, do_sample=True, use_cache=False - ) - torch.manual_seed(0) - output_with_past_cache = model.generate(input_ids[:1], num_beams=2, max_length=5, do_sample=True) - self.parent.assertTrue(torch.all(output_with_past_cache == output_without_past_cache)) - - def create_and_check_t5_model_fp16_forward( - self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, - ): - model = T5Model(config=config) - model.to(torch_device) - model.half() - model.eval() - output = model(input_ids, decoder_input_ids=input_ids, attention_mask=attention_mask)[0] - self.parent.assertFalse(torch.isnan(output).any().item()) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - decoder_input_ids, - attention_mask, - decoder_attention_mask, - lm_labels, - ) = config_and_inputs - - inputs_dict = { - "input_ids": input_ids, - "attention_mask": attention_mask, - "decoder_input_ids": decoder_input_ids, - "decoder_attention_mask": decoder_attention_mask, - "use_cache": False, - } - return config, inputs_dict - def setUp(self): - self.model_tester = T5ModelTest.T5ModelTester(self) + self.model_tester = T5ModelTester(self) self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37) def test_config(self): diff --git a/tests/test_modeling_tf_albert.py b/tests/test_modeling_tf_albert.py index 04983ab5ac..cdcf0ffaf3 100644 --- a/tests/test_modeling_tf_albert.py +++ b/tests/test_modeling_tf_albert.py @@ -34,6 +34,186 @@ if is_tf_available(): ) +class TFAlbertModelTester: + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + embedding_size=16, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.embedding_size = 16 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = AlbertConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + ) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def create_and_check_albert_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFAlbertModel(config=config) + # inputs = {'input_ids': input_ids, + # 'attention_mask': input_mask, + # 'token_type_ids': token_type_ids} + # sequence_output, pooled_output = model(**inputs) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + sequence_output, pooled_output = model(inputs) + + inputs = [input_ids, input_mask] + sequence_output, pooled_output = model(inputs) + + sequence_output, pooled_output = model(input_ids) + + result = { + "sequence_output": sequence_output.numpy(), + "pooled_output": pooled_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) + + def create_and_check_albert_for_pretraining( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFAlbertForPreTraining(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + prediction_scores, sop_scores = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + "sop_scores": sop_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual(list(result["sop_scores"].shape), [self.batch_size, self.num_labels]) + + def create_and_check_albert_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFAlbertForMaskedLM(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (prediction_scores,) = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_albert_for_sequence_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFAlbertForSequenceClassification(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) + + def create_and_check_albert_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFAlbertForQuestionAnswering(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + start_logits, end_logits = model(inputs) + result = { + "start_logits": start_logits.numpy(), + "end_logits": end_logits.numpy(), + } + self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): @@ -49,187 +229,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): else () ) - class TFAlbertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - embedding_size=16, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.embedding_size = embedding_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = AlbertConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def create_and_check_albert_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFAlbertModel(config=config) - # inputs = {'input_ids': input_ids, - # 'attention_mask': input_mask, - # 'token_type_ids': token_type_ids} - # sequence_output, pooled_output = model(**inputs) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - sequence_output, pooled_output = model(inputs) - - inputs = [input_ids, input_mask] - sequence_output, pooled_output = model(inputs) - - sequence_output, pooled_output = model(input_ids) - - result = { - "sequence_output": sequence_output.numpy(), - "pooled_output": pooled_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) - - def create_and_check_albert_for_pretraining( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFAlbertForPreTraining(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - prediction_scores, sop_scores = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - "sop_scores": sop_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual(list(result["sop_scores"].shape), [self.batch_size, self.num_labels]) - - def create_and_check_albert_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFAlbertForMaskedLM(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (prediction_scores,) = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_albert_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFAlbertForSequenceClassification(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) - - def create_and_check_albert_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFAlbertForQuestionAnswering(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - start_logits, end_logits = model(inputs) - result = { - "start_logits": start_logits.numpy(), - "end_logits": end_logits.numpy(), - } - self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self) + self.model_tester = TFAlbertModelTester(self) self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 53b9c72da7..4fab58920b 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -37,6 +37,221 @@ if is_tf_available(): ) +class TFBertModelTester: + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = BertConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + ) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def create_and_check_bert_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFBertModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + sequence_output, pooled_output = model(inputs) + + inputs = [input_ids, input_mask] + sequence_output, pooled_output = model(inputs) + + sequence_output, pooled_output = model(input_ids) + + result = { + "sequence_output": sequence_output.numpy(), + "pooled_output": pooled_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) + + def create_and_check_bert_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFBertForMaskedLM(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (prediction_scores,) = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_bert_for_next_sequence_prediction( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFBertForNextSentencePrediction(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (seq_relationship_score,) = model(inputs) + result = { + "seq_relationship_score": seq_relationship_score.numpy(), + } + self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2]) + + def create_and_check_bert_for_pretraining( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFBertForPreTraining(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + prediction_scores, seq_relationship_score = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + "seq_relationship_score": seq_relationship_score.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2]) + + def create_and_check_bert_for_sequence_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFBertForSequenceClassification(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) + + def create_and_check_bert_for_multiple_choice( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_choices = self.num_choices + model = TFBertForMultipleChoice(config=config) + multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) + multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) + multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) + inputs = { + "input_ids": multiple_choice_inputs_ids, + "attention_mask": multiple_choice_input_mask, + "token_type_ids": multiple_choice_token_type_ids, + } + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices]) + + def create_and_check_bert_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFBertForTokenClassification(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]) + + def create_and_check_bert_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFBertForQuestionAnswering(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + start_logits, end_logits = model(inputs) + result = { + "start_logits": start_logits.numpy(), + "end_logits": end_logits.numpy(), + } + self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): @@ -55,224 +270,8 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): else () ) - class TFBertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = BertConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def create_and_check_bert_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFBertModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - sequence_output, pooled_output = model(inputs) - - inputs = [input_ids, input_mask] - sequence_output, pooled_output = model(inputs) - - sequence_output, pooled_output = model(input_ids) - - result = { - "sequence_output": sequence_output.numpy(), - "pooled_output": pooled_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) - - def create_and_check_bert_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFBertForMaskedLM(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (prediction_scores,) = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_bert_for_next_sequence_prediction( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFBertForNextSentencePrediction(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (seq_relationship_score,) = model(inputs) - result = { - "seq_relationship_score": seq_relationship_score.numpy(), - } - self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2]) - - def create_and_check_bert_for_pretraining( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFBertForPreTraining(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - prediction_scores, seq_relationship_score = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - "seq_relationship_score": seq_relationship_score.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2]) - - def create_and_check_bert_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFBertForSequenceClassification(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) - - def create_and_check_bert_for_multiple_choice( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = TFBertForMultipleChoice(config=config) - multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) - multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) - multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) - inputs = { - "input_ids": multiple_choice_inputs_ids, - "attention_mask": multiple_choice_input_mask, - "token_type_ids": multiple_choice_token_type_ids, - } - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices]) - - def create_and_check_bert_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFBertForTokenClassification(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual( - list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels] - ) - - def create_and_check_bert_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFBertForQuestionAnswering(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - start_logits, end_logits = model(inputs) - result = { - "start_logits": start_logits.numpy(), - "end_logits": end_logits.numpy(), - } - self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFBertModelTest.TFBertModelTester(self) + self.model_tester = TFBertModelTester(self) self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_tf_ctrl.py b/tests/test_modeling_tf_ctrl.py index e375800396..789dc65ec4 100644 --- a/tests/test_modeling_tf_ctrl.py +++ b/tests/test_modeling_tf_ctrl.py @@ -28,163 +28,141 @@ if is_tf_available(): from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST +class TFCTRLModelTester(object): + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_token_type_ids = True + self.use_input_mask = True + self.use_labels = True + self.use_mc_token_ids = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + mc_token_ids = None + if self.use_mc_token_ids: + mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = CTRLConfig( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range + ) + + head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) + + return ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) + + def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFCTRLModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + sequence_output = model(inputs)[0] + + inputs = [input_ids, None, input_mask] # None is the input for 'past' + sequence_output = model(inputs)[0] + + sequence_output = model(input_ids)[0] + + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFCTRLLMHeadModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + prediction_scores = model(inputs)[0] + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + + ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else () all_generative_model_classes = (TFCTRLLMHeadModel,) if is_tf_available() else () - class TFCTRLModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_token_type_ids=True, - use_input_mask=True, - use_labels=True, - use_mc_token_ids=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_token_type_ids = use_token_type_ids - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.use_mc_token_ids = use_mc_token_ids - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - mc_token_ids = None - if self.use_mc_token_ids: - mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = CTRLConfig( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range - ) - - head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) - - return ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) - - def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFCTRLModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - sequence_output = model(inputs)[0] - - inputs = [input_ids, None, input_mask] # None is the input for 'past' - sequence_output = model(inputs)[0] - - sequence_output = model(input_ids)[0] - - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFCTRLLMHeadModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - prediction_scores = model(inputs)[0] - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - - ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFCTRLModelTest.TFCTRLModelTester(self) + self.model_tester = TFCTRLModelTester(self) self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37) def test_config(self): diff --git a/tests/test_modeling_tf_distilbert.py b/tests/test_modeling_tf_distilbert.py index c513885f29..c4fb2f103f 100644 --- a/tests/test_modeling_tf_distilbert.py +++ b/tests/test_modeling_tf_distilbert.py @@ -32,6 +32,128 @@ if is_tf_available(): ) +class TFDistilBertModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = False + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = DistilBertConfig( + vocab_size=self.vocab_size, + dim=self.hidden_size, + n_layers=self.num_hidden_layers, + n_heads=self.num_attention_heads, + hidden_dim=self.intermediate_size, + hidden_act=self.hidden_act, + dropout=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + initializer_range=self.initializer_range, + ) + + return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + + def create_and_check_distilbert_model( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFDistilBertModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask} + + outputs = model(inputs) + sequence_output = outputs[0] + + inputs = [input_ids, input_mask] + + (sequence_output,) = model(inputs) + + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_distilbert_for_masked_lm( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFDistilBertForMaskedLM(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask} + (prediction_scores,) = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_distilbert_for_question_answering( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFDistilBertForQuestionAnswering(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask} + start_logits, end_logits = model(inputs) + result = { + "start_logits": start_logits.numpy(), + "end_logits": end_logits.numpy(), + } + self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) + + def create_and_check_distilbert_for_sequence_classification( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFDistilBertForSequenceClassification(config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask} + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase): @@ -50,151 +172,8 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase): test_resize_embeddings = True test_head_masking = True - class TFDistilBertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=False, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = DistilBertConfig( - vocab_size=self.vocab_size, - dim=self.hidden_size, - n_layers=self.num_hidden_layers, - n_heads=self.num_attention_heads, - hidden_dim=self.intermediate_size, - hidden_act=self.hidden_act, - dropout=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - initializer_range=self.initializer_range, - ) - - return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - - def create_and_check_distilbert_model( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFDistilBertModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask} - - outputs = model(inputs) - sequence_output = outputs[0] - - inputs = [input_ids, input_mask] - - (sequence_output,) = model(inputs) - - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_distilbert_for_masked_lm( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFDistilBertForMaskedLM(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask} - (prediction_scores,) = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_distilbert_for_question_answering( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFDistilBertForQuestionAnswering(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask} - start_logits, end_logits = model(inputs) - result = { - "start_logits": start_logits.numpy(), - "end_logits": end_logits.numpy(), - } - self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) - - def create_and_check_distilbert_for_sequence_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFDistilBertForSequenceClassification(config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask} - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFDistilBertModelTest.TFDistilBertModelTester(self) + self.model_tester = TFDistilBertModelTester(self) self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37) def test_config(self): diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py index 3f79811b48..977b8f9b6c 100644 --- a/tests/test_modeling_tf_electra.py +++ b/tests/test_modeling_tf_electra.py @@ -32,6 +32,138 @@ if is_tf_available(): ) +class TFElectraModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = ElectraConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + ) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def create_and_check_electra_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFElectraModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (sequence_output,) = model(inputs) + + inputs = [input_ids, input_mask] + (sequence_output,) = model(inputs) + + (sequence_output,) = model(input_ids) + + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_electra_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFElectraForMaskedLM(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (prediction_scores,) = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_electra_for_pretraining( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFElectraForPreTraining(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (prediction_scores,) = model(inputs) + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual(list(result["prediction_scores"].shape), [self.batch_size, self.seq_length]) + + def create_and_check_electra_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFElectraForTokenClassification(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): @@ -41,163 +173,8 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): else () ) - class TFElectraModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = ElectraConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def create_and_check_electra_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFElectraModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (sequence_output,) = model(inputs) - - inputs = [input_ids, input_mask] - (sequence_output,) = model(inputs) - - (sequence_output,) = model(input_ids) - - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_electra_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFElectraForMaskedLM(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (prediction_scores,) = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_electra_for_pretraining( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFElectraForPreTraining(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (prediction_scores,) = model(inputs) - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual(list(result["prediction_scores"].shape), [self.batch_size, self.seq_length]) - - def create_and_check_electra_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFElectraForTokenClassification(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual( - list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels] - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFElectraModelTest.TFElectraModelTester(self) + self.model_tester = TFElectraModelTester(self) self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_tf_gpt2.py b/tests/test_modeling_tf_gpt2.py index 8177789a2e..6afd2959f4 100644 --- a/tests/test_modeling_tf_gpt2.py +++ b/tests/test_modeling_tf_gpt2.py @@ -34,268 +34,246 @@ if is_tf_available(): ) +class TFGPT2ModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_token_type_ids = True + self.use_input_mask = True + self.use_labels = True + self.use_mc_token_ids = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + self.bos_token_id = self.vocab_size - 1 + self.eos_token_id = self.vocab_size - 1 + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + mc_token_ids = None + if self.use_mc_token_ids: + mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = GPT2Config( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings, + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range + bos_token_id=self.bos_token_id, + eos_token_id=self.eos_token_id, + ) + + head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) + + return ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) + + def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFGPT2Model(config=config) + inputs = { + "input_ids": input_ids, + "attention_mask": input_mask, + "token_type_ids": token_type_ids, + } + sequence_output = model(inputs)[0] + + inputs = [input_ids, None, input_mask] # None is the input for 'past' + sequence_output = model(inputs)[0] + + sequence_output = model(input_ids)[0] + + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size], + ) + + def create_and_check_gpt2_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFGPT2Model(config=config) + + # first forward pass + output, past = model(input_ids, token_type_ids=token_type_ids) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + next_token_types = ids_tensor([self.batch_size, 1], self.type_vocab_size) + + # append to next input_ids and token_type_ids + next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) + next_token_type_ids = tf.concat([token_type_ids, next_token_types], axis=-1) + + output_from_no_past, _ = model(next_input_ids, token_type_ids=next_token_type_ids) + output_from_past, _ = model(next_tokens, token_type_ids=next_token_types, past=past) + + # select random slice + random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1])) + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] + output_from_past_slice = output_from_past[:, 0, random_slice_idx] + + # test that outputs are equal for slice + tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6) + + def create_and_check_gpt2_model_attention_mask_past( + self, config, input_ids, input_mask, head_mask, token_type_ids, *args + ): + model = TFGPT2Model(config=config) + + # create attention mask + half_seq_length = self.seq_length // 2 + attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) + attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) + attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) + + # first forward pass + output, past = model(input_ids, attention_mask=attn_mask) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + + # change a random masked slice from input_ids + random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1 + random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size) + vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change) + condition = tf.transpose( + tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size)) + ) + input_ids = tf.where(condition, random_other_next_tokens, input_ids) + + # append to next input_ids and attn_mask + next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) + attn_mask = tf.concat([attn_mask, tf.ones((shape_list(attn_mask)[0], 1), dtype=tf.int32)], axis=1) + + # get two different outputs + output_from_no_past, _ = model(next_input_ids, attention_mask=attn_mask) + output_from_past, _ = model(next_tokens, past=past, attention_mask=attn_mask) + + # select random slice + random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1])) + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] + output_from_past_slice = output_from_past[:, 0, random_slice_idx] + + # test that outputs are equal for slice + tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-12) + + def create_and_check_gpt2_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFGPT2LMHeadModel(config=config) + inputs = { + "input_ids": input_ids, + "attention_mask": input_mask, + "token_type_ids": token_type_ids, + } + prediction_scores = model(inputs)[0] + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size], + ) + + def create_and_check_gpt2_double_head( + self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args + ): + model = TFGPT2DoubleHeadsModel(config=config) + + multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) + multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) + multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) + + inputs = { + "input_ids": multiple_choice_inputs_ids, + "mc_token_ids": mc_token_ids, + "attention_mask": multiple_choice_input_mask, + "token_type_ids": multiple_choice_token_type_ids, + } + lm_logits, mc_logits = model(inputs)[:2] + result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()} + self.parent.assertListEqual( + list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size], + ) + self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + + ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + + inputs_dict = { + "input_ids": input_ids, + "token_type_ids": token_type_ids, + "attention_mask": input_mask, + } + return config, inputs_dict + + @require_tf class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else () all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else () - class TFGPT2ModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_token_type_ids=True, - use_input_mask=True, - use_labels=True, - use_mc_token_ids=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_token_type_ids = use_token_type_ids - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.use_mc_token_ids = use_mc_token_ids - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.bos_token_id = vocab_size - 1 - self.eos_token_id = vocab_size - 1 - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - mc_token_ids = None - if self.use_mc_token_ids: - mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = GPT2Config( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings, - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range - bos_token_id=self.bos_token_id, - eos_token_id=self.eos_token_id, - ) - - head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) - - return ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) - - def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFGPT2Model(config=config) - inputs = { - "input_ids": input_ids, - "attention_mask": input_mask, - "token_type_ids": token_type_ids, - } - sequence_output = model(inputs)[0] - - inputs = [input_ids, None, input_mask] # None is the input for 'past' - sequence_output = model(inputs)[0] - - sequence_output = model(input_ids)[0] - - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size], - ) - - def create_and_check_gpt2_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFGPT2Model(config=config) - - # first forward pass - output, past = model(input_ids, token_type_ids=token_type_ids) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - next_token_types = ids_tensor([self.batch_size, 1], self.type_vocab_size) - - # append to next input_ids and token_type_ids - next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) - next_token_type_ids = tf.concat([token_type_ids, next_token_types], axis=-1) - - output_from_no_past, _ = model(next_input_ids, token_type_ids=next_token_type_ids) - output_from_past, _ = model(next_tokens, token_type_ids=next_token_types, past=past) - - # select random slice - random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1])) - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] - output_from_past_slice = output_from_past[:, 0, random_slice_idx] - - # test that outputs are equal for slice - tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6) - - def create_and_check_gpt2_model_attention_mask_past( - self, config, input_ids, input_mask, head_mask, token_type_ids, *args - ): - model = TFGPT2Model(config=config) - - # create attention mask - half_seq_length = self.seq_length // 2 - attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) - attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) - attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) - - # first forward pass - output, past = model(input_ids, attention_mask=attn_mask) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # change a random masked slice from input_ids - random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1 - random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size) - vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change) - condition = tf.transpose( - tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size)) - ) - input_ids = tf.where(condition, random_other_next_tokens, input_ids) - - # append to next input_ids and attn_mask - next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) - attn_mask = tf.concat([attn_mask, tf.ones((shape_list(attn_mask)[0], 1), dtype=tf.int32)], axis=1) - - # get two different outputs - output_from_no_past, _ = model(next_input_ids, attention_mask=attn_mask) - output_from_past, _ = model(next_tokens, past=past, attention_mask=attn_mask) - - # select random slice - random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1])) - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] - output_from_past_slice = output_from_past[:, 0, random_slice_idx] - - # test that outputs are equal for slice - tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-12) - - def create_and_check_gpt2_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFGPT2LMHeadModel(config=config) - inputs = { - "input_ids": input_ids, - "attention_mask": input_mask, - "token_type_ids": token_type_ids, - } - prediction_scores = model(inputs)[0] - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size], - ) - - def create_and_check_gpt2_double_head( - self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args - ): - model = TFGPT2DoubleHeadsModel(config=config) - - multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) - multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) - multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) - - inputs = { - "input_ids": multiple_choice_inputs_ids, - "mc_token_ids": mc_token_ids, - "attention_mask": multiple_choice_input_mask, - "token_type_ids": multiple_choice_token_type_ids, - } - lm_logits, mc_logits = model(inputs)[:2] - result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()} - self.parent.assertListEqual( - list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size], - ) - self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - - ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - - inputs_dict = { - "input_ids": input_ids, - "token_type_ids": token_type_ids, - "attention_mask": input_mask, - } - return config, inputs_dict - def setUp(self): - self.model_tester = TFGPT2ModelTest.TFGPT2ModelTester(self) + self.model_tester = TFGPT2ModelTester(self) self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37) def test_config(self): diff --git a/tests/test_modeling_tf_openai_gpt.py b/tests/test_modeling_tf_openai_gpt.py index a7573082cd..5baf108c58 100644 --- a/tests/test_modeling_tf_openai_gpt.py +++ b/tests/test_modeling_tf_openai_gpt.py @@ -33,6 +33,155 @@ if is_tf_available(): ) +class TFOpenAIGPTModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_token_type_ids = True + self.use_input_mask = True + self.use_labels = True + self.use_mc_token_ids = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + mc_token_ids = None + if self.use_mc_token_ids: + mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = OpenAIGPTConfig( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range + ) + + head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) + + return ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) + + def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFOpenAIGPTModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + sequence_output = model(inputs)[0] + + inputs = [input_ids, input_mask] + sequence_output = model(inputs)[0] + + sequence_output = model(input_ids)[0] + + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): + model = TFOpenAIGPTLMHeadModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + prediction_scores = model(inputs)[0] + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_openai_gpt_double_head( + self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args + ): + model = TFOpenAIGPTDoubleHeadsModel(config=config) + + multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) + multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) + multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) + + inputs = { + "input_ids": multiple_choice_inputs_ids, + "mc_token_ids": mc_token_ids, + "attention_mask": multiple_choice_input_mask, + "token_type_ids": multiple_choice_token_type_ids, + } + lm_logits, mc_logits = model(inputs)[:2] + result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()} + self.parent.assertListEqual( + list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + + ( + config, + input_ids, + input_mask, + head_mask, + token_type_ids, + mc_token_ids, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): @@ -43,179 +192,8 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): (TFOpenAIGPTLMHeadModel,) if is_tf_available() else () ) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly - class TFOpenAIGPTModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_token_type_ids=True, - use_input_mask=True, - use_labels=True, - use_mc_token_ids=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_token_type_ids = use_token_type_ids - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.use_mc_token_ids = use_mc_token_ids - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - mc_token_ids = None - if self.use_mc_token_ids: - mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = OpenAIGPTConfig( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range - ) - - head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) - - return ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) - - def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFOpenAIGPTModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - sequence_output = model(inputs)[0] - - inputs = [input_ids, input_mask] - sequence_output = model(inputs)[0] - - sequence_output = model(input_ids)[0] - - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): - model = TFOpenAIGPTLMHeadModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - prediction_scores = model(inputs)[0] - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_openai_gpt_double_head( - self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args - ): - model = TFOpenAIGPTDoubleHeadsModel(config=config) - - multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) - multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) - multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) - - inputs = { - "input_ids": multiple_choice_inputs_ids, - "mc_token_ids": mc_token_ids, - "attention_mask": multiple_choice_input_mask, - "token_type_ids": multiple_choice_token_type_ids, - } - lm_logits, mc_logits = model(inputs)[:2] - result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()} - self.parent.assertListEqual( - list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - - ( - config, - input_ids, - input_mask, - head_mask, - token_type_ids, - mc_token_ids, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFOpenAIGPTModelTest.TFOpenAIGPTModelTester(self) + self.model_tester = TFOpenAIGPTModelTester(self) self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37) def test_config(self): diff --git a/tests/test_modeling_tf_roberta.py b/tests/test_modeling_tf_roberta.py index a7d2603809..52d1ad3d7a 100644 --- a/tests/test_modeling_tf_roberta.py +++ b/tests/test_modeling_tf_roberta.py @@ -36,6 +36,139 @@ if is_tf_available(): ) +class TFRobertaModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_token_type_ids = True + self.use_labels = True + self.vocab_size = 99 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.intermediate_size = 37 + self.hidden_act = "gelu" + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = RobertaConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + ) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def create_and_check_roberta_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFRobertaModel(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + sequence_output = model(inputs)[0] + + inputs = [input_ids, input_mask] + sequence_output = model(inputs)[0] + + sequence_output = model(input_ids)[0] + + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_roberta_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFRobertaForMaskedLM(config=config) + prediction_scores = model([input_ids, input_mask, token_type_ids])[0] + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_roberta_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFRobertaForTokenClassification(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + (logits,) = model(inputs) + result = { + "logits": logits.numpy(), + } + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]) + + def create_and_check_roberta_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFRobertaForQuestionAnswering(config=config) + inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} + start_logits, end_logits = model(inputs) + result = { + "start_logits": start_logits.numpy(), + "end_logits": end_logits.numpy(), + } + self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_tf class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): @@ -51,164 +184,8 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): else () ) - class TFRobertaModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = RobertaConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def create_and_check_roberta_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFRobertaModel(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - sequence_output = model(inputs)[0] - - inputs = [input_ids, input_mask] - sequence_output = model(inputs)[0] - - sequence_output = model(input_ids)[0] - - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_roberta_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFRobertaForMaskedLM(config=config) - prediction_scores = model([input_ids, input_mask, token_type_ids])[0] - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_roberta_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = TFRobertaForTokenClassification(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - (logits,) = model(inputs) - result = { - "logits": logits.numpy(), - } - self.parent.assertListEqual( - list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels] - ) - - def create_and_check_roberta_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = TFRobertaForQuestionAnswering(config=config) - inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} - start_logits, end_logits = model(inputs) - result = { - "start_logits": start_logits.numpy(), - "end_logits": end_logits.numpy(), - } - self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = TFRobertaModelTest.TFRobertaModelTester(self) + self.model_tester = TFRobertaModelTester(self) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_tf_t5.py b/tests/test_modeling_tf_t5.py index e533087c1c..d19e18d6d1 100644 --- a/tests/test_modeling_tf_t5.py +++ b/tests/test_modeling_tf_t5.py @@ -28,6 +28,186 @@ if is_tf_available(): from transformers import TFT5Model, TFT5ForConditionalGeneration, T5Tokenizer +class TFT5ModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_mask = True + self.use_labels = True + self.vocab_size = 99 + self.n_positions = 14 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.d_ff = 37 + self.relative_attention_num_buckets = 8 + self.dropout_rate = 0.1 + self.initializer_factor = 0.002 + self.eos_token_id = 1 + self.pad_token_id = 0 + self.scope = None + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_labels = None + if self.use_labels: + token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + config = T5Config( + vocab_size=self.vocab_size, + n_positions=self.n_positions, + d_model=self.hidden_size, + d_ff=self.d_ff, + d_kv=self.hidden_size // self.num_attention_heads, + num_layers=self.num_hidden_layers, + num_heads=self.num_attention_heads, + relative_attention_num_buckets=self.relative_attention_num_buckets, + dropout_rate=self.dropout_rate, + initializer_factor=self.initializer_factor, + eos_token_id=self.eos_token_id, + bos_token_id=self.pad_token_id, + pad_token_id=self.pad_token_id, + ) + + return (config, input_ids, input_mask, token_labels) + + def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels): + model = TFT5Model(config=config) + inputs = { + "inputs": input_ids, + "decoder_input_ids": input_ids, + "decoder_attention_mask": input_mask, + } + decoder_output, decoder_past, encoder_output = model(inputs) + + decoder_output, decoder_past, encoder_output = model( + input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids + ) + result = { + "encoder_output": encoder_output.numpy(), + "decoder_past": decoder_past, + "decoder_output": decoder_output.numpy(), + } + self.parent.assertListEqual( + list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual( + list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertEqual(len(decoder_past), 2) + # decoder_past[0] should correspond to encoder output + self.parent.assertTrue(tf.reduce_all(tf.math.equal(decoder_past[0][0], encoder_output))) + # There should be `num_layers` key value embeddings stored in decoder_past[1] + self.parent.assertEqual(len(decoder_past[1]), config.num_layers) + # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple + self.parent.assertEqual(len(decoder_past[1][0]), 4) + + def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels): + model = TFT5ForConditionalGeneration(config=config) + inputs_dict = { + "inputs": input_ids, + "decoder_input_ids": input_ids, + "decoder_attention_mask": input_mask, + } + + prediction_scores, _, _ = model(inputs_dict) + + result = { + "prediction_scores": prediction_scores.numpy(), + } + self.parent.assertListEqual( + list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + + def create_and_check_t5_decoder_model_past(self, config, input_ids, decoder_input_ids, attention_mask): + model = TFT5Model(config=config).get_decoder() + + input_ids = input_ids[:1, :] + self.batch_size = 1 + + # first forward pass + _, past_key_value_states = model(input_ids, use_cache=True) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + + # append to next input_ids and + next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) + + output_from_no_past = model(next_input_ids)[0] + output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0] + + # select random slice + random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1])) + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] + output_from_past_slice = output_from_past[:, 0, random_slice_idx] + + # test that outputs are equal for slice + tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3) + + def create_and_check_t5_decoder_model_attention_mask_past( + self, config, input_ids, decoder_input_ids, attention_mask + ): + model = TFT5Model(config=config).get_decoder() + + # create attention mask + half_seq_length = self.seq_length // 2 + attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) + attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) + attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) + + # first forward pass + _, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True) + + # create hypothetical next token and extent to next_input_ids + next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) + + # change a random masked slice from input_ids + random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1 + random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size) + vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change) + condition = tf.transpose( + tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size)) + ) + input_ids = tf.where(condition, random_other_next_tokens, input_ids) + + # append to next input_ids and attn_mask + next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) + attn_mask = tf.concat([attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1,) + + # get two different outputs + output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] + output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0] + + # select random slice + random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item() + output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] + output_from_past_slice = output_from_past[:, 0, random_slice_idx] + + # test that outputs are equal for slice + tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids, input_mask, token_labels) = config_and_inputs + inputs_dict = { + "inputs": input_ids, + "decoder_input_ids": input_ids, + "decoder_attention_mask": input_mask, + "use_cache": tf.convert_to_tensor([False]), + } + return config, inputs_dict + + @require_tf class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): @@ -35,207 +215,8 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else () all_generative_model_classes = (TFT5ForConditionalGeneration,) if is_tf_available() else () - class TFT5ModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_labels=True, - vocab_size=99, - n_positions=14, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - d_ff=37, - relative_attention_num_buckets=8, - dropout_rate=0.1, - initializer_factor=0.002, - eos_token_id=1, - pad_token_id=0, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_labels = use_labels - self.vocab_size = vocab_size - self.n_positions = n_positions - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.d_ff = d_ff - self.relative_attention_num_buckets = relative_attention_num_buckets - self.dropout_rate = dropout_rate - self.initializer_factor = initializer_factor - self.eos_token_id = eos_token_id - self.pad_token_id = pad_token_id - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_labels = None - if self.use_labels: - token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - config = T5Config( - vocab_size=self.vocab_size, - n_positions=self.n_positions, - d_model=self.hidden_size, - d_ff=self.d_ff, - d_kv=self.hidden_size // self.num_attention_heads, - num_layers=self.num_hidden_layers, - num_heads=self.num_attention_heads, - relative_attention_num_buckets=self.relative_attention_num_buckets, - dropout_rate=self.dropout_rate, - initializer_factor=self.initializer_factor, - eos_token_id=self.eos_token_id, - bos_token_id=self.pad_token_id, - pad_token_id=self.pad_token_id, - ) - - return (config, input_ids, input_mask, token_labels) - - def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels): - model = TFT5Model(config=config) - inputs = { - "inputs": input_ids, - "decoder_input_ids": input_ids, - "decoder_attention_mask": input_mask, - } - decoder_output, decoder_past, encoder_output = model(inputs) - - decoder_output, decoder_past, encoder_output = model( - input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids - ) - result = { - "encoder_output": encoder_output.numpy(), - "decoder_past": decoder_past, - "decoder_output": decoder_output.numpy(), - } - self.parent.assertListEqual( - list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual( - list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertEqual(len(decoder_past), 2) - # decoder_past[0] should correspond to encoder output - self.parent.assertTrue(tf.reduce_all(tf.math.equal(decoder_past[0][0], encoder_output))) - # There should be `num_layers` key value embeddings stored in decoder_past[1] - self.parent.assertEqual(len(decoder_past[1]), config.num_layers) - # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple - self.parent.assertEqual(len(decoder_past[1][0]), 4) - - def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels): - model = TFT5ForConditionalGeneration(config=config) - inputs_dict = { - "inputs": input_ids, - "decoder_input_ids": input_ids, - "decoder_attention_mask": input_mask, - } - - prediction_scores, _, _ = model(inputs_dict) - - result = { - "prediction_scores": prediction_scores.numpy(), - } - self.parent.assertListEqual( - list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_t5_decoder_model_past(self, config, input_ids, decoder_input_ids, attention_mask): - model = TFT5Model(config=config).get_decoder() - - input_ids = input_ids[:1, :] - self.batch_size = 1 - - # first forward pass - _, past_key_value_states = model(input_ids, use_cache=True) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # append to next input_ids and - next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) - - output_from_no_past = model(next_input_ids)[0] - output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0] - - # select random slice - random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1])) - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] - output_from_past_slice = output_from_past[:, 0, random_slice_idx] - - # test that outputs are equal for slice - tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3) - - def create_and_check_t5_decoder_model_attention_mask_past( - self, config, input_ids, decoder_input_ids, attention_mask - ): - model = TFT5Model(config=config).get_decoder() - - # create attention mask - half_seq_length = self.seq_length // 2 - attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) - attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) - attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) - - # first forward pass - _, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True) - - # create hypothetical next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) - - # change a random masked slice from input_ids - random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1 - random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size) - vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change) - condition = tf.transpose( - tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size)) - ) - input_ids = tf.where(condition, random_other_next_tokens, input_ids) - - # append to next input_ids and attn_mask - next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) - attn_mask = tf.concat([attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1,) - - # get two different outputs - output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] - output_from_past = model( - next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask - )[0] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item() - output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] - output_from_past_slice = output_from_past[:, 0, random_slice_idx] - - # test that outputs are equal for slice - tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, input_ids, input_mask, token_labels) = config_and_inputs - inputs_dict = { - "inputs": input_ids, - "decoder_input_ids": input_ids, - "decoder_attention_mask": input_mask, - "use_cache": tf.convert_to_tensor([False]), - } - return config, inputs_dict - def setUp(self): - self.model_tester = TFT5ModelTest.TFT5ModelTester(self) + self.model_tester = TFT5ModelTester(self) self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37) def test_config(self): diff --git a/tests/test_modeling_tf_transfo_xl.py b/tests/test_modeling_tf_transfo_xl.py index f1a0107106..438b92c46a 100644 --- a/tests/test_modeling_tf_transfo_xl.py +++ b/tests/test_modeling_tf_transfo_xl.py @@ -33,6 +33,135 @@ if is_tf_available(): ) +class TFTransfoXLModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.mem_len = 30 + self.key_length = self.seq_length + self.mem_len + self.clamp_len = 15 + self.is_training = True + self.use_labels = True + self.vocab_size = 99 + self.cutoffs = [10, 50, 80] + self.hidden_size = 32 + self.d_embed = 32 + self.num_attention_heads = 4 + self.d_head = 8 + self.d_inner = 128 + self.div_val = 2 + self.num_hidden_layers = 5 + self.scope = None + self.seed = 1 + self.eos_token_id = 0 + + def prepare_config_and_inputs(self): + input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + lm_labels = None + if self.use_labels: + lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + config = TransfoXLConfig( + vocab_size=self.vocab_size, + mem_len=self.mem_len, + clamp_len=self.clamp_len, + cutoffs=self.cutoffs, + d_model=self.hidden_size, + d_embed=self.d_embed, + n_head=self.num_attention_heads, + d_head=self.d_head, + d_inner=self.d_inner, + div_val=self.div_val, + n_layer=self.num_hidden_layers, + eos_token_id=self.eos_token_id, + ) + + return (config, input_ids_1, input_ids_2, lm_labels) + + def set_seed(self): + random.seed(self.seed) + tf.random.set_seed(self.seed) + + def create_and_check_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): + model = TFTransfoXLModel(config) + + hidden_states_1, mems_1 = model(input_ids_1) + + inputs = {"input_ids": input_ids_2, "mems": mems_1} + + hidden_states_2, mems_2 = model(inputs) + + result = { + "hidden_states_1": hidden_states_1.numpy(), + "mems_1": [mem.numpy() for mem in mems_1], + "hidden_states_2": hidden_states_2.numpy(), + "mems_2": [mem.numpy() for mem in mems_2], + } + + self.parent.assertListEqual( + list(result["hidden_states_1"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual( + list(result["hidden_states_2"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_1"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_2"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels): + model = TFTransfoXLLMHeadModel(config) + + lm_logits_1, mems_1 = model(input_ids_1) + + inputs = {"input_ids": input_ids_1, "labels": lm_labels} + _, mems_1 = model(inputs) + + lm_logits_2, mems_2 = model([input_ids_2, mems_1]) + + inputs = {"input_ids": input_ids_1, "mems": mems_1, "labels": lm_labels} + + _, mems_2 = model(inputs) + + result = { + "mems_1": [mem.numpy() for mem in mems_1], + "lm_logits_1": lm_logits_1.numpy(), + "mems_2": [mem.numpy() for mem in mems_2], + "lm_logits_2": lm_logits_2.numpy(), + } + + self.parent.assertListEqual( + list(result["lm_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_1"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + self.parent.assertListEqual( + list(result["lm_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_2"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs + inputs_dict = {"input_ids": input_ids_1} + return config, inputs_dict + + @require_tf class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase): @@ -43,155 +172,8 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase): test_torchscript = False test_resize_embeddings = False - class TFTransfoXLModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - mem_len=30, - clamp_len=15, - is_training=True, - use_labels=True, - vocab_size=99, - cutoffs=[10, 50, 80], - hidden_size=32, - d_embed=32, - num_attention_heads=4, - d_head=8, - d_inner=128, - div_val=2, - num_hidden_layers=5, - scope=None, - seed=1, - eos_token_id=0, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.mem_len = mem_len - self.key_length = seq_length + mem_len - self.clamp_len = clamp_len - self.is_training = is_training - self.use_labels = use_labels - self.vocab_size = vocab_size - self.cutoffs = cutoffs - self.hidden_size = hidden_size - self.d_embed = d_embed - self.num_attention_heads = num_attention_heads - self.d_head = d_head - self.d_inner = d_inner - self.div_val = div_val - self.num_hidden_layers = num_hidden_layers - self.scope = scope - self.seed = seed - self.eos_token_id = eos_token_id - - def prepare_config_and_inputs(self): - input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - lm_labels = None - if self.use_labels: - lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - config = TransfoXLConfig( - vocab_size=self.vocab_size, - mem_len=self.mem_len, - clamp_len=self.clamp_len, - cutoffs=self.cutoffs, - d_model=self.hidden_size, - d_embed=self.d_embed, - n_head=self.num_attention_heads, - d_head=self.d_head, - d_inner=self.d_inner, - div_val=self.div_val, - n_layer=self.num_hidden_layers, - eos_token_id=self.eos_token_id, - ) - - return (config, input_ids_1, input_ids_2, lm_labels) - - def set_seed(self): - random.seed(self.seed) - tf.random.set_seed(self.seed) - - def create_and_check_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): - model = TFTransfoXLModel(config) - - hidden_states_1, mems_1 = model(input_ids_1) - - inputs = {"input_ids": input_ids_2, "mems": mems_1} - - hidden_states_2, mems_2 = model(inputs) - - result = { - "hidden_states_1": hidden_states_1.numpy(), - "mems_1": [mem.numpy() for mem in mems_1], - "hidden_states_2": hidden_states_2.numpy(), - "mems_2": [mem.numpy() for mem in mems_2], - } - - self.parent.assertListEqual( - list(result["hidden_states_1"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual( - list(result["hidden_states_2"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_1"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_2"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels): - model = TFTransfoXLLMHeadModel(config) - - lm_logits_1, mems_1 = model(input_ids_1) - - inputs = {"input_ids": input_ids_1, "labels": lm_labels} - _, mems_1 = model(inputs) - - lm_logits_2, mems_2 = model([input_ids_2, mems_1]) - - inputs = {"input_ids": input_ids_1, "mems": mems_1, "labels": lm_labels} - - _, mems_2 = model(inputs) - - result = { - "mems_1": [mem.numpy() for mem in mems_1], - "lm_logits_1": lm_logits_1.numpy(), - "mems_2": [mem.numpy() for mem in mems_2], - "lm_logits_2": lm_logits_2.numpy(), - } - - self.parent.assertListEqual( - list(result["lm_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_1"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - self.parent.assertListEqual( - list(result["lm_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_2"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs - inputs_dict = {"input_ids": input_ids_1} - return config, inputs_dict - def setUp(self): - self.model_tester = TFTransfoXLModelTest.TFTransfoXLModelTester(self) + self.model_tester = TFTransfoXLModelTester(self) self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) def test_config(self): diff --git a/tests/test_modeling_tf_xlm.py b/tests/test_modeling_tf_xlm.py index eff929858c..c608219aed 100644 --- a/tests/test_modeling_tf_xlm.py +++ b/tests/test_modeling_tf_xlm.py @@ -35,6 +35,211 @@ if is_tf_available(): ) +class TFXLMModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_lengths = True + self.use_token_type_ids = True + self.use_labels = True + self.gelu_activation = True + self.sinusoidal_embeddings = False + self.causal = False + self.asm = False + self.n_langs = 2 + self.vocab_size = 99 + self.n_special = 0 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_vocab_size = 16 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.summary_type = "last" + self.use_proj = True + self.scope = None + self.bos_token_id = 0 + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) + + input_lengths = None + if self.use_input_lengths: + input_lengths = ( + ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 + ) # small variation of seq_length + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs) + + sequence_labels = None + token_labels = None + is_impossible_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) + + config = XLMConfig( + vocab_size=self.vocab_size, + n_special=self.n_special, + emb_dim=self.hidden_size, + n_layers=self.num_hidden_layers, + n_heads=self.num_attention_heads, + dropout=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + gelu_activation=self.gelu_activation, + sinusoidal_embeddings=self.sinusoidal_embeddings, + asm=self.asm, + causal=self.causal, + n_langs=self.n_langs, + max_position_embeddings=self.max_position_embeddings, + initializer_range=self.initializer_range, + summary_type=self.summary_type, + use_proj=self.use_proj, + bos_token_id=self.bos_token_id, + ) + + return ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ) + + def create_and_check_xlm_model( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = TFXLMModel(config=config) + inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids} + outputs = model(inputs) + + inputs = [input_ids, input_mask] + outputs = model(inputs) + sequence_output = outputs[0] + result = { + "sequence_output": sequence_output.numpy(), + } + self.parent.assertListEqual( + list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_xlm_lm_head( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = TFXLMWithLMHeadModel(config) + + inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids} + outputs = model(inputs) + + logits = outputs[0] + + result = { + "logits": logits.numpy(), + } + + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]) + + def create_and_check_xlm_qa( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = TFXLMForQuestionAnsweringSimple(config) + + inputs = {"input_ids": input_ids, "lengths": input_lengths} + + start_logits, end_logits = model(inputs) + + result = { + "start_logits": start_logits.numpy(), + "end_logits": end_logits.numpy(), + } + + self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) + + def create_and_check_xlm_sequence_classif( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = TFXLMForSequenceClassification(config) + + inputs = {"input_ids": input_ids, "lengths": input_lengths} + + (logits,) = model(inputs) + + result = { + "logits": logits.numpy(), + } + + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size]) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ) = config_and_inputs + inputs_dict = { + "input_ids": input_ids, + "token_type_ids": token_type_ids, + "langs": token_type_ids, + "lengths": input_lengths, + } + return config, inputs_dict + + @require_tf class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase): @@ -47,244 +252,8 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase): (TFXLMWithLMHeadModel,) if is_tf_available() else () ) # TODO (PVP): Check other models whether language generation is also applicable - class TFXLMModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_lengths=True, - use_token_type_ids=True, - use_labels=True, - gelu_activation=True, - sinusoidal_embeddings=False, - causal=False, - asm=False, - n_langs=2, - vocab_size=99, - n_special=0, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - summary_type="last", - use_proj=True, - scope=None, - bos_token_id=0, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_lengths = use_input_lengths - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.gelu_activation = gelu_activation - self.sinusoidal_embeddings = sinusoidal_embeddings - self.asm = asm - self.n_langs = n_langs - self.vocab_size = vocab_size - self.n_special = n_special - self.summary_type = summary_type - self.causal = causal - self.use_proj = use_proj - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.n_langs = n_langs - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.summary_type = summary_type - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.bos_token_id = bos_token_id - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) - - input_lengths = None - if self.use_input_lengths: - input_lengths = ( - ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 - ) # small variation of seq_length - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs) - - sequence_labels = None - token_labels = None - is_impossible_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) - - config = XLMConfig( - vocab_size=self.vocab_size, - n_special=self.n_special, - emb_dim=self.hidden_size, - n_layers=self.num_hidden_layers, - n_heads=self.num_attention_heads, - dropout=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - gelu_activation=self.gelu_activation, - sinusoidal_embeddings=self.sinusoidal_embeddings, - asm=self.asm, - causal=self.causal, - n_langs=self.n_langs, - max_position_embeddings=self.max_position_embeddings, - initializer_range=self.initializer_range, - summary_type=self.summary_type, - use_proj=self.use_proj, - bos_token_id=self.bos_token_id, - ) - - return ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ) - - def create_and_check_xlm_model( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = TFXLMModel(config=config) - inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids} - outputs = model(inputs) - - inputs = [input_ids, input_mask] - outputs = model(inputs) - sequence_output = outputs[0] - result = { - "sequence_output": sequence_output.numpy(), - } - self.parent.assertListEqual( - list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_xlm_lm_head( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = TFXLMWithLMHeadModel(config) - - inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids} - outputs = model(inputs) - - logits = outputs[0] - - result = { - "logits": logits.numpy(), - } - - self.parent.assertListEqual( - list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_xlm_qa( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = TFXLMForQuestionAnsweringSimple(config) - - inputs = {"input_ids": input_ids, "lengths": input_lengths} - - start_logits, end_logits = model(inputs) - - result = { - "start_logits": start_logits.numpy(), - "end_logits": end_logits.numpy(), - } - - self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) - - def create_and_check_xlm_sequence_classif( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = TFXLMForSequenceClassification(config) - - inputs = {"input_ids": input_ids, "lengths": input_lengths} - - (logits,) = model(inputs) - - result = { - "logits": logits.numpy(), - } - - self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size]) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ) = config_and_inputs - inputs_dict = { - "input_ids": input_ids, - "token_type_ids": token_type_ids, - "langs": token_type_ids, - "lengths": input_lengths, - } - return config, inputs_dict - def setUp(self): - self.model_tester = TFXLMModelTest.TFXLMModelTester(self) + self.model_tester = TFXLMModelTester(self) self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37) def test_config(self): diff --git a/tests/test_modeling_tf_xlnet.py b/tests/test_modeling_tf_xlnet.py index 762f0b49b3..d9711a4f5d 100644 --- a/tests/test_modeling_tf_xlnet.py +++ b/tests/test_modeling_tf_xlnet.py @@ -37,6 +37,304 @@ if is_tf_available(): ) +class TFXLNetModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.mem_len = 10 + # self.key_len = seq_length + mem_len + self.clamp_len = -1 + self.reuse_len = 15 + self.is_training = True + self.use_labels = True + self.vocab_size = 99 + self.cutoffs = [10, 50, 80] + self.hidden_size = 32 + self.num_attention_heads = 4 + self.d_inner = 128 + self.num_hidden_layers = 5 + self.type_sequence_label_size = 2 + self.untie_r = True + self.bi_data = False + self.same_length = False + self.initializer_range = 0.05 + self.seed = 1 + self.type_vocab_size = 2 + self.bos_token_id = 1 + self.eos_token_id = 2 + self.pad_token_id = 5 + + def prepare_config_and_inputs(self): + input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) + + input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size) + perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32) + perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32) + perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1) + # perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token + target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32) + target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32) + target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1) + # target_mapping[:, 0, -1] = 1.0 # predict last token + + sequence_labels = None + lm_labels = None + is_impossible_labels = None + if self.use_labels: + lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) + + config = XLNetConfig( + vocab_size=self.vocab_size, + d_model=self.hidden_size, + n_head=self.num_attention_heads, + d_inner=self.d_inner, + n_layer=self.num_hidden_layers, + untie_r=self.untie_r, + mem_len=self.mem_len, + clamp_len=self.clamp_len, + same_length=self.same_length, + reuse_len=self.reuse_len, + bi_data=self.bi_data, + initializer_range=self.initializer_range, + num_labels=self.type_sequence_label_size, + bos_token_id=self.bos_token_id, + pad_token_id=self.pad_token_id, + eos_token_id=self.eos_token_id, + ) + + return ( + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ) + + def set_seed(self): + random.seed(self.seed) + tf.random.set_seed(self.seed) + + def create_and_check_xlnet_base_model( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ): + model = TFXLNetModel(config) + + inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids} + + _, _ = model(inputs) + + inputs = [input_ids_1, input_mask] + + outputs, mems_1 = model(inputs) + + result = { + "mems_1": [mem.numpy() for mem in mems_1], + "outputs": outputs.numpy(), + } + + config.mem_len = 0 + model = TFXLNetModel(config) + no_mems_outputs = model(inputs) + self.parent.assertEqual(len(no_mems_outputs), 1) + + self.parent.assertListEqual( + list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_lm_head( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ): + model = TFXLNetLMHeadModel(config) + + inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids} + + all_logits_1, mems_1 = model(inputs_1) + + inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids} + + all_logits_2, mems_2 = model(inputs_2) + + inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping} + + logits, _ = model(inputs_3) + + result = { + "mems_1": [mem.numpy() for mem in mems_1], + "all_logits_1": all_logits_1.numpy(), + "mems_2": [mem.numpy() for mem in mems_2], + "all_logits_2": all_logits_2.numpy(), + } + + self.parent.assertListEqual( + list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + self.parent.assertListEqual( + list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_2"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_qa( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ): + model = TFXLNetForQuestionAnsweringSimple(config) + + inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids} + start_logits, end_logits, mems = model(inputs) + + result = { + "start_logits": start_logits.numpy(), + "end_logits": end_logits.numpy(), + "mems": [m.numpy() for m in mems], + } + + self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_sequence_classif( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ): + model = TFXLNetForSequenceClassification(config) + + logits, mems_1 = model(input_ids_1) + + result = { + "mems_1": [mem.numpy() for mem in mems_1], + "logits": logits.numpy(), + } + + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size]) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_for_token_classification( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ): + config.num_labels = input_ids_1.shape[1] + model = TFXLNetForTokenClassification(config) + inputs = { + "input_ids": input_ids_1, + "attention_mask": input_mask, + # 'token_type_ids': token_type_ids + } + logits, mems_1 = model(inputs) + result = { + "mems_1": [mem.numpy() for mem in mems_1], + "logits": logits.numpy(), + } + self.parent.assertListEqual( + list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels] + ) + self.parent.assertListEqual( + list(list(mem.shape) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids_1} + return config, inputs_dict + + @require_tf class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): @@ -56,329 +354,8 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): ) # TODO (PVP): Check other models whether language generation is also applicable test_pruning = False - class TFXLNetModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - mem_len=10, - clamp_len=-1, - reuse_len=15, - is_training=True, - use_labels=True, - vocab_size=99, - cutoffs=[10, 50, 80], - hidden_size=32, - num_attention_heads=4, - d_inner=128, - num_hidden_layers=5, - type_sequence_label_size=2, - untie_r=True, - bi_data=False, - same_length=False, - initializer_range=0.05, - seed=1, - type_vocab_size=2, - bos_token_id=1, - eos_token_id=2, - pad_token_id=5, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.mem_len = mem_len - # self.key_len = seq_length + mem_len - self.clamp_len = clamp_len - self.reuse_len = reuse_len - self.is_training = is_training - self.use_labels = use_labels - self.vocab_size = vocab_size - self.cutoffs = cutoffs - self.hidden_size = hidden_size - self.num_attention_heads = num_attention_heads - self.d_inner = d_inner - self.num_hidden_layers = num_hidden_layers - self.bi_data = bi_data - self.untie_r = untie_r - self.same_length = same_length - self.initializer_range = initializer_range - self.seed = seed - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.bos_token_id = bos_token_id - self.pad_token_id = pad_token_id - self.eos_token_id = eos_token_id - - def prepare_config_and_inputs(self): - input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) - - input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size) - perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32) - perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32) - perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1) - # perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token - target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32) - target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32) - target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1) - # target_mapping[:, 0, -1] = 1.0 # predict last token - - sequence_labels = None - lm_labels = None - is_impossible_labels = None - if self.use_labels: - lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) - - config = XLNetConfig( - vocab_size=self.vocab_size, - d_model=self.hidden_size, - n_head=self.num_attention_heads, - d_inner=self.d_inner, - n_layer=self.num_hidden_layers, - untie_r=self.untie_r, - mem_len=self.mem_len, - clamp_len=self.clamp_len, - same_length=self.same_length, - reuse_len=self.reuse_len, - bi_data=self.bi_data, - initializer_range=self.initializer_range, - num_labels=self.type_sequence_label_size, - bos_token_id=self.bos_token_id, - pad_token_id=self.pad_token_id, - eos_token_id=self.eos_token_id, - ) - - return ( - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ) - - def set_seed(self): - random.seed(self.seed) - tf.random.set_seed(self.seed) - - def create_and_check_xlnet_base_model( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ): - model = TFXLNetModel(config) - - inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids} - - _, _ = model(inputs) - - inputs = [input_ids_1, input_mask] - - outputs, mems_1 = model(inputs) - - result = { - "mems_1": [mem.numpy() for mem in mems_1], - "outputs": outputs.numpy(), - } - - config.mem_len = 0 - model = TFXLNetModel(config) - no_mems_outputs = model(inputs) - self.parent.assertEqual(len(no_mems_outputs), 1) - - self.parent.assertListEqual( - list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_lm_head( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ): - model = TFXLNetLMHeadModel(config) - - inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids} - - all_logits_1, mems_1 = model(inputs_1) - - inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids} - - all_logits_2, mems_2 = model(inputs_2) - - inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping} - - logits, _ = model(inputs_3) - - result = { - "mems_1": [mem.numpy() for mem in mems_1], - "all_logits_1": all_logits_1.numpy(), - "mems_2": [mem.numpy() for mem in mems_2], - "all_logits_2": all_logits_2.numpy(), - } - - self.parent.assertListEqual( - list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - self.parent.assertListEqual( - list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_2"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_qa( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ): - model = TFXLNetForQuestionAnsweringSimple(config) - - inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids} - start_logits, end_logits, mems = model(inputs) - - result = { - "start_logits": start_logits.numpy(), - "end_logits": end_logits.numpy(), - "mems": [m.numpy() for m in mems], - } - - self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_sequence_classif( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ): - model = TFXLNetForSequenceClassification(config) - - logits, mems_1 = model(input_ids_1) - - result = { - "mems_1": [mem.numpy() for mem in mems_1], - "logits": logits.numpy(), - } - - self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size]) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_for_token_classification( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ): - config.num_labels = input_ids_1.shape[1] - model = TFXLNetForTokenClassification(config) - inputs = { - "input_ids": input_ids_1, - "attention_mask": input_mask, - # 'token_type_ids': token_type_ids - } - logits, mems_1 = model(inputs) - result = { - "mems_1": [mem.numpy() for mem in mems_1], - "logits": logits.numpy(), - } - self.parent.assertListEqual( - list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels] - ) - self.parent.assertListEqual( - list(list(mem.shape) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids_1} - return config, inputs_dict - def setUp(self): - self.model_tester = TFXLNetModelTest.TFXLNetModelTester(self) + self.model_tester = TFXLNetModelTester(self) self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) def test_config(self): diff --git a/tests/test_modeling_transfo_xl.py b/tests/test_modeling_transfo_xl.py index adc2436239..0ec552da07 100644 --- a/tests/test_modeling_transfo_xl.py +++ b/tests/test_modeling_transfo_xl.py @@ -29,6 +29,137 @@ if is_torch_available(): from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST +class TransfoXLModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 14 + self.seq_length = 7 + self.mem_len = 30 + self.key_length = self.seq_length + self.mem_len + self.clamp_len = 15 + self.is_training = True + self.use_labels = True + self.vocab_size = 99 + self.cutoffs = [10, 50, 80] + self.hidden_size = 32 + self.d_embed = 32 + self.num_attention_heads = 4 + self.d_head = 8 + self.d_inner = 128 + self.div_val = 2 + self.num_hidden_layers = 5 + self.scope = None + self.seed = 1 + self.eos_token_id = 0 + + def prepare_config_and_inputs(self): + input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + lm_labels = None + if self.use_labels: + lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + config = TransfoXLConfig( + vocab_size=self.vocab_size, + mem_len=self.mem_len, + clamp_len=self.clamp_len, + cutoffs=self.cutoffs, + d_model=self.hidden_size, + d_embed=self.d_embed, + n_head=self.num_attention_heads, + d_head=self.d_head, + d_inner=self.d_inner, + div_val=self.div_val, + n_layer=self.num_hidden_layers, + eos_token_id=self.eos_token_id, + ) + + return (config, input_ids_1, input_ids_2, lm_labels) + + def set_seed(self): + random.seed(self.seed) + torch.manual_seed(self.seed) + + def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): + model = TransfoXLModel(config) + model.to(torch_device) + model.eval() + + hidden_states_1, mems_1 = model(input_ids_1) + hidden_states_2, mems_2 = model(input_ids_2, mems_1) + outputs = { + "hidden_states_1": hidden_states_1, + "mems_1": mems_1, + "hidden_states_2": hidden_states_2, + "mems_2": mems_2, + } + return outputs + + def check_transfo_xl_model_output(self, result): + self.parent.assertListEqual( + list(result["hidden_states_1"].size()), [self.batch_size, self.seq_length, self.hidden_size], + ) + self.parent.assertListEqual( + list(result["hidden_states_2"].size()), [self.batch_size, self.seq_length, self.hidden_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_1"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_2"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels): + model = TransfoXLLMHeadModel(config) + model.to(torch_device) + model.eval() + + lm_logits_1, mems_1 = model(input_ids_1) + loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels) + lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1) + loss_2, _, mems_2 = model(input_ids_2, labels=lm_labels, mems=mems_1) + + outputs = { + "loss_1": loss_1, + "mems_1": mems_1, + "lm_logits_1": lm_logits_1, + "loss_2": loss_2, + "mems_2": mems_2, + "lm_logits_2": lm_logits_2, + } + return outputs + + def check_transfo_xl_lm_head_output(self, result): + self.parent.assertListEqual(list(result["loss_1"].size()), [self.batch_size, self.seq_length - 1]) + self.parent.assertListEqual( + list(result["lm_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_1"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + self.parent.assertListEqual(list(result["loss_2"].size()), [self.batch_size, self.seq_length - 1]) + self.parent.assertListEqual( + list(result["lm_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_2"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs + inputs_dict = {"input_ids": input_ids_1} + return config, inputs_dict + + @require_torch class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): @@ -38,155 +169,6 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): test_torchscript = False test_resize_embeddings = True - class TransfoXLModelTester(object): - def __init__( - self, - parent, - batch_size=14, - seq_length=7, - mem_len=30, - clamp_len=15, - is_training=True, - use_labels=True, - vocab_size=99, - cutoffs=[10, 50, 80], - hidden_size=32, - d_embed=32, - num_attention_heads=4, - d_head=8, - d_inner=128, - div_val=2, - num_hidden_layers=5, - scope=None, - seed=1, - eos_token_id=0, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.mem_len = mem_len - self.key_length = seq_length + mem_len - self.clamp_len = clamp_len - self.is_training = is_training - self.use_labels = use_labels - self.vocab_size = vocab_size - self.cutoffs = cutoffs - self.hidden_size = hidden_size - self.d_embed = d_embed - self.num_attention_heads = num_attention_heads - self.d_head = d_head - self.d_inner = d_inner - self.div_val = div_val - self.num_hidden_layers = num_hidden_layers - self.scope = scope - self.seed = seed - self.eos_token_id = eos_token_id - - def prepare_config_and_inputs(self): - input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - lm_labels = None - if self.use_labels: - lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - config = TransfoXLConfig( - vocab_size=self.vocab_size, - mem_len=self.mem_len, - clamp_len=self.clamp_len, - cutoffs=self.cutoffs, - d_model=self.hidden_size, - d_embed=self.d_embed, - n_head=self.num_attention_heads, - d_head=self.d_head, - d_inner=self.d_inner, - div_val=self.div_val, - n_layer=self.num_hidden_layers, - eos_token_id=self.eos_token_id, - ) - - return (config, input_ids_1, input_ids_2, lm_labels) - - def set_seed(self): - random.seed(self.seed) - torch.manual_seed(self.seed) - - def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): - model = TransfoXLModel(config) - model.to(torch_device) - model.eval() - - hidden_states_1, mems_1 = model(input_ids_1) - hidden_states_2, mems_2 = model(input_ids_2, mems_1) - outputs = { - "hidden_states_1": hidden_states_1, - "mems_1": mems_1, - "hidden_states_2": hidden_states_2, - "mems_2": mems_2, - } - return outputs - - def check_transfo_xl_model_output(self, result): - self.parent.assertListEqual( - list(result["hidden_states_1"].size()), [self.batch_size, self.seq_length, self.hidden_size], - ) - self.parent.assertListEqual( - list(result["hidden_states_2"].size()), [self.batch_size, self.seq_length, self.hidden_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_1"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_2"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels): - model = TransfoXLLMHeadModel(config) - model.to(torch_device) - model.eval() - - lm_logits_1, mems_1 = model(input_ids_1) - loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels) - lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1) - loss_2, _, mems_2 = model(input_ids_2, labels=lm_labels, mems=mems_1) - - outputs = { - "loss_1": loss_1, - "mems_1": mems_1, - "lm_logits_1": lm_logits_1, - "loss_2": loss_2, - "mems_2": mems_2, - "lm_logits_2": lm_logits_2, - } - return outputs - - def check_transfo_xl_lm_head_output(self, result): - self.parent.assertListEqual(list(result["loss_1"].size()), [self.batch_size, self.seq_length - 1]) - self.parent.assertListEqual( - list(result["lm_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_1"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - self.parent.assertListEqual(list(result["loss_2"].size()), [self.batch_size, self.seq_length - 1]) - self.parent.assertListEqual( - list(result["lm_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_2"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs - inputs_dict = {"input_ids": input_ids_1} - return config, inputs_dict - def check_cutoffs_and_n_token( self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size ): @@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): self.assertEqual(model.crit.n_token, vocab_size + resized_value) def setUp(self): - self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self) + self.model_tester = TransfoXLModelTester(self) self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) def test_config(self): diff --git a/tests/test_modeling_xlm.py b/tests/test_modeling_xlm.py index 5d79ce3ae3..52432113ad 100644 --- a/tests/test_modeling_xlm.py +++ b/tests/test_modeling_xlm.py @@ -37,6 +37,306 @@ if is_torch_available(): from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST +class XLMModelTester: + def __init__( + self, parent, + ): + self.parent = parent + self.batch_size = 13 + self.seq_length = 7 + self.is_training = True + self.use_input_lengths = True + self.use_token_type_ids = True + self.use_labels = True + self.gelu_activation = True + self.sinusoidal_embeddings = False + self.causal = False + self.asm = False + self.n_langs = 2 + self.vocab_size = 99 + self.n_special = 0 + self.hidden_size = 32 + self.num_hidden_layers = 5 + self.num_attention_heads = 4 + self.hidden_dropout_prob = 0.1 + self.attention_probs_dropout_prob = 0.1 + self.max_position_embeddings = 512 + self.type_sequence_label_size = 2 + self.initializer_range = 0.02 + self.num_labels = 3 + self.num_choices = 4 + self.summary_type = "last" + self.use_proj = True + self.scope = None + self.bos_token_id = 0 + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() + + input_lengths = None + if self.use_input_lengths: + input_lengths = ( + ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 + ) # small variation of seq_length + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs) + + sequence_labels = None + token_labels = None + is_impossible_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + is_impossible_labels = ids_tensor([self.batch_size], 2).float() + + config = XLMConfig( + vocab_size=self.vocab_size, + n_special=self.n_special, + emb_dim=self.hidden_size, + n_layers=self.num_hidden_layers, + n_heads=self.num_attention_heads, + dropout=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + gelu_activation=self.gelu_activation, + sinusoidal_embeddings=self.sinusoidal_embeddings, + asm=self.asm, + causal=self.causal, + n_langs=self.n_langs, + max_position_embeddings=self.max_position_embeddings, + initializer_range=self.initializer_range, + summary_type=self.summary_type, + use_proj=self.use_proj, + bos_token_id=self.bos_token_id, + ) + + return ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result["loss"].size()), []) + + def create_and_check_xlm_model( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = XLMModel(config=config) + model.to(torch_device) + model.eval() + outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids) + outputs = model(input_ids, langs=token_type_ids) + outputs = model(input_ids) + sequence_output = outputs[0] + result = { + "sequence_output": sequence_output, + } + self.parent.assertListEqual( + list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] + ) + + def create_and_check_xlm_lm_head( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = XLMWithLMHeadModel(config) + model.to(torch_device) + model.eval() + + loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels) + + result = { + "loss": loss, + "logits": logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]) + + def create_and_check_xlm_simple_qa( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = XLMForQuestionAnsweringSimple(config) + model.to(torch_device) + model.eval() + + outputs = model(input_ids) + + outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) + loss, start_logits, end_logits = outputs + + result = { + "loss": loss, + "start_logits": start_logits, + "end_logits": end_logits, + } + self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) + self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) + self.check_loss_output(result) + + def create_and_check_xlm_qa( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = XLMForQuestionAnswering(config) + model.to(torch_device) + model.eval() + + outputs = model(input_ids) + start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs + + outputs = model( + input_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + cls_index=sequence_labels, + is_impossible=is_impossible_labels, + p_mask=input_mask, + ) + + outputs = model( + input_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + cls_index=sequence_labels, + is_impossible=is_impossible_labels, + ) + + (total_loss,) = outputs + + outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) + + (total_loss,) = outputs + + result = { + "loss": total_loss, + "start_top_log_probs": start_top_log_probs, + "start_top_index": start_top_index, + "end_top_log_probs": end_top_log_probs, + "end_top_index": end_top_index, + "cls_logits": cls_logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top] + ) + self.parent.assertListEqual( + list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top] + ) + self.parent.assertListEqual( + list(result["end_top_log_probs"].size()), + [self.batch_size, model.config.start_n_top * model.config.end_n_top], + ) + self.parent.assertListEqual( + list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top], + ) + self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) + + def create_and_check_xlm_sequence_classif( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + model = XLMForSequenceClassification(config) + model.to(torch_device) + model.eval() + + (logits,) = model(input_ids) + loss, logits = model(input_ids, labels=sequence_labels) + + result = { + "loss": loss, + "logits": logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]) + + def create_and_check_xlm_for_token_classification( + self, + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ): + config.num_labels = self.num_labels + model = XLMForTokenClassification(config) + model.to(torch_device) + model.eval() + + loss, logits = model(input_ids, attention_mask=input_mask, labels=token_labels) + result = { + "loss": loss, + "logits": logits, + } + self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]) + self.check_loss_output(result) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + input_mask, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths} + return config, inputs_dict + + @require_torch class XLMModelTest(ModelTesterMixin, unittest.TestCase): @@ -55,345 +355,8 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase): (XLMWithLMHeadModel,) if is_torch_available() else () ) # TODO (PVP): Check other models whether language generation is also applicable - class XLMModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_lengths=True, - use_token_type_ids=True, - use_labels=True, - gelu_activation=True, - sinusoidal_embeddings=False, - causal=False, - asm=False, - n_langs=2, - vocab_size=99, - n_special=0, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - summary_type="last", - use_proj=True, - scope=None, - bos_token_id=0, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_lengths = use_input_lengths - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.gelu_activation = gelu_activation - self.sinusoidal_embeddings = sinusoidal_embeddings - self.asm = asm - self.n_langs = n_langs - self.vocab_size = vocab_size - self.n_special = n_special - self.summary_type = summary_type - self.causal = causal - self.use_proj = use_proj - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.n_langs = n_langs - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.summary_type = summary_type - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.bos_token_id = bos_token_id - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() - - input_lengths = None - if self.use_input_lengths: - input_lengths = ( - ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 - ) # small variation of seq_length - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs) - - sequence_labels = None - token_labels = None - is_impossible_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - is_impossible_labels = ids_tensor([self.batch_size], 2).float() - - config = XLMConfig( - vocab_size=self.vocab_size, - n_special=self.n_special, - emb_dim=self.hidden_size, - n_layers=self.num_hidden_layers, - n_heads=self.num_attention_heads, - dropout=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - gelu_activation=self.gelu_activation, - sinusoidal_embeddings=self.sinusoidal_embeddings, - asm=self.asm, - causal=self.causal, - n_langs=self.n_langs, - max_position_embeddings=self.max_position_embeddings, - initializer_range=self.initializer_range, - summary_type=self.summary_type, - use_proj=self.use_proj, - bos_token_id=self.bos_token_id, - ) - - return ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ) - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result["loss"].size()), []) - - def create_and_check_xlm_model( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = XLMModel(config=config) - model.to(torch_device) - model.eval() - outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids) - outputs = model(input_ids, langs=token_type_ids) - outputs = model(input_ids) - sequence_output = outputs[0] - result = { - "sequence_output": sequence_output, - } - self.parent.assertListEqual( - list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_xlm_lm_head( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = XLMWithLMHeadModel(config) - model.to(torch_device) - model.eval() - - loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels) - - result = { - "loss": loss, - "logits": logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size] - ) - - def create_and_check_xlm_simple_qa( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = XLMForQuestionAnsweringSimple(config) - model.to(torch_device) - model.eval() - - outputs = model(input_ids) - - outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) - loss, start_logits, end_logits = outputs - - result = { - "loss": loss, - "start_logits": start_logits, - "end_logits": end_logits, - } - self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) - self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) - self.check_loss_output(result) - - def create_and_check_xlm_qa( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = XLMForQuestionAnswering(config) - model.to(torch_device) - model.eval() - - outputs = model(input_ids) - start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs - - outputs = model( - input_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - cls_index=sequence_labels, - is_impossible=is_impossible_labels, - p_mask=input_mask, - ) - - outputs = model( - input_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - cls_index=sequence_labels, - is_impossible=is_impossible_labels, - ) - - (total_loss,) = outputs - - outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels) - - (total_loss,) = outputs - - result = { - "loss": total_loss, - "start_top_log_probs": start_top_log_probs, - "start_top_index": start_top_index, - "end_top_log_probs": end_top_log_probs, - "end_top_index": end_top_index, - "cls_logits": cls_logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top] - ) - self.parent.assertListEqual( - list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top] - ) - self.parent.assertListEqual( - list(result["end_top_log_probs"].size()), - [self.batch_size, model.config.start_n_top * model.config.end_n_top], - ) - self.parent.assertListEqual( - list(result["end_top_index"].size()), - [self.batch_size, model.config.start_n_top * model.config.end_n_top], - ) - self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) - - def create_and_check_xlm_sequence_classif( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - model = XLMForSequenceClassification(config) - model.to(torch_device) - model.eval() - - (logits,) = model(input_ids) - loss, logits = model(input_ids, labels=sequence_labels) - - result = { - "loss": loss, - "logits": logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size] - ) - - def create_and_check_xlm_for_token_classification( - self, - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ): - config.num_labels = self.num_labels - model = XLMForTokenClassification(config) - model.to(torch_device) - model.eval() - - loss, logits = model(input_ids, attention_mask=input_mask, labels=token_labels) - result = { - "loss": loss, - "logits": logits, - } - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels] - ) - self.check_loss_output(result) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - input_mask, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths} - return config, inputs_dict - def setUp(self): - self.model_tester = XLMModelTest.XLMModelTester(self) + self.model_tester = XLMModelTester(self) self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37) def test_config(self): diff --git a/tests/test_modeling_xlnet.py b/tests/test_modeling_xlnet.py index fccee2d6a4..e3701bdc44 100644 --- a/tests/test_modeling_xlnet.py +++ b/tests/test_modeling_xlnet.py @@ -39,6 +39,415 @@ if is_torch_available(): from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST +class XLNetModelTester: + def __init__( + self, + parent, + batch_size=14, + seq_length=7, + mem_len=10, + clamp_len=-1, + reuse_len=15, + is_training=True, + use_labels=True, + vocab_size=99, + cutoffs=[10, 50, 80], + hidden_size=32, + num_attention_heads=4, + d_inner=128, + num_hidden_layers=5, + type_sequence_label_size=2, + untie_r=True, + bi_data=False, + same_length=False, + initializer_range=0.05, + seed=1, + type_vocab_size=2, + bos_token_id=1, + eos_token_id=2, + pad_token_id=5, + num_choices=4, + ): + self.parent = parent + self.batch_size = 14 + self.seq_length = 7 + self.mem_len = 10 + # self.key_len = seq_length + mem_len + self.clamp_len = -1 + self.reuse_len = 15 + self.is_training = True + self.use_labels = True + self.vocab_size = 99 + self.cutoffs = [10, 50, 80] + self.hidden_size = 32 + self.num_attention_heads = 4 + self.d_inner = 128 + self.num_hidden_layers = 5 + self.type_sequence_label_size = 2 + self.untie_r = True + self.bi_data = False + self.same_length = False + self.initializer_range = 0.05 + self.seed = 1 + self.type_vocab_size = 2 + self.bos_token_id = 1 + self.eos_token_id = 2 + self.pad_token_id = 5 + self.num_choices = 4 + + def prepare_config_and_inputs(self): + input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() + + input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size) + perm_mask = torch.zeros( + self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device, + ) + perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token + target_mapping = torch.zeros(self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,) + target_mapping[:, 0, -1] = 1.0 # predict last token + + sequence_labels = None + lm_labels = None + is_impossible_labels = None + token_labels = None + if self.use_labels: + lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + is_impossible_labels = ids_tensor([self.batch_size], 2).float() + token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + config = XLNetConfig( + vocab_size=self.vocab_size, + d_model=self.hidden_size, + n_head=self.num_attention_heads, + d_inner=self.d_inner, + n_layer=self.num_hidden_layers, + untie_r=self.untie_r, + mem_len=self.mem_len, + clamp_len=self.clamp_len, + same_length=self.same_length, + reuse_len=self.reuse_len, + bi_data=self.bi_data, + initializer_range=self.initializer_range, + num_labels=self.type_sequence_label_size, + bos_token_id=self.bos_token_id, + pad_token_id=self.pad_token_id, + eos_token_id=self.eos_token_id, + ) + + return ( + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ) + + def set_seed(self): + random.seed(self.seed) + torch.manual_seed(self.seed) + + def create_and_check_xlnet_base_model( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ): + model = XLNetModel(config) + model.to(torch_device) + model.eval() + + _, _ = model(input_ids_1, input_mask=input_mask) + _, _ = model(input_ids_1, attention_mask=input_mask) + _, _ = model(input_ids_1, token_type_ids=segment_ids) + outputs, mems_1 = model(input_ids_1) + + result = { + "mems_1": mems_1, + "outputs": outputs, + } + + config.mem_len = 0 + model = XLNetModel(config) + model.to(torch_device) + model.eval() + no_mems_outputs = model(input_ids_1) + self.parent.assertEqual(len(no_mems_outputs), 1) + + self.parent.assertListEqual( + list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_base_model_with_att_output( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ): + model = XLNetModel(config) + model.to(torch_device) + model.eval() + + _, _, attentions = model(input_ids_1, target_mapping=target_mapping, output_attentions=True) + + self.parent.assertEqual(len(attentions), config.n_layer) + self.parent.assertIsInstance(attentions[0], tuple) + self.parent.assertEqual(len(attentions[0]), 2) + self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape) + + def create_and_check_xlnet_lm_head( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ): + model = XLNetLMHeadModel(config) + model.to(torch_device) + model.eval() + + loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels) + + loss_2, all_logits_2, mems_2 = model(input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1) + + logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping) + + result = { + "loss_1": loss_1, + "mems_1": mems_1, + "all_logits_1": all_logits_1, + "loss_2": loss_2, + "mems_2": mems_2, + "all_logits_2": all_logits_2, + } + + self.parent.assertListEqual(list(result["loss_1"].size()), []) + self.parent.assertListEqual( + list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + self.parent.assertListEqual(list(result["loss_2"].size()), []) + self.parent.assertListEqual( + list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_2"]), + [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_qa( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ): + model = XLNetForQuestionAnswering(config) + model.to(torch_device) + model.eval() + + outputs = model(input_ids_1) + (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems,) = outputs + + outputs = model( + input_ids_1, + start_positions=sequence_labels, + end_positions=sequence_labels, + cls_index=sequence_labels, + is_impossible=is_impossible_labels, + p_mask=input_mask, + ) + + outputs = model( + input_ids_1, + start_positions=sequence_labels, + end_positions=sequence_labels, + cls_index=sequence_labels, + is_impossible=is_impossible_labels, + ) + + total_loss, mems = outputs + + outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels,) + + total_loss, mems = outputs + + result = { + "loss": total_loss, + "start_top_log_probs": start_top_log_probs, + "start_top_index": start_top_index, + "end_top_log_probs": end_top_log_probs, + "end_top_index": end_top_index, + "cls_logits": cls_logits, + "mems": mems, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top], + ) + self.parent.assertListEqual( + list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top], + ) + self.parent.assertListEqual( + list(result["end_top_log_probs"].size()), + [self.batch_size, model.config.start_n_top * model.config.end_n_top], + ) + self.parent.assertListEqual( + list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top], + ) + self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_token_classif( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ): + model = XLNetForTokenClassification(config) + model.to(torch_device) + model.eval() + + logits, mems_1 = model(input_ids_1) + loss, logits, mems_1 = model(input_ids_1, labels=token_labels) + + result = { + "loss": loss, + "mems_1": mems_1, + "logits": logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def create_and_check_xlnet_sequence_classif( + self, + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ): + model = XLNetForSequenceClassification(config) + model.to(torch_device) + model.eval() + + logits, mems_1 = model(input_ids_1) + loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels) + + result = { + "loss": loss, + "mems_1": mems_1, + "logits": logits, + } + + self.parent.assertListEqual(list(result["loss"].size()), []) + self.parent.assertListEqual( + list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size], + ) + self.parent.assertListEqual( + list(list(mem.size()) for mem in result["mems_1"]), + [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, + ) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids_1} + return config, inputs_dict + + @require_torch class XLNetModelTest(ModelTesterMixin, unittest.TestCase): @@ -59,421 +468,8 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): ) # TODO (PVP): Check other models whether language generation is also applicable test_pruning = False - class XLNetModelTester(object): - def __init__( - self, - parent, - batch_size=14, - seq_length=7, - mem_len=10, - clamp_len=-1, - reuse_len=15, - is_training=True, - use_labels=True, - vocab_size=99, - cutoffs=[10, 50, 80], - hidden_size=32, - num_attention_heads=4, - d_inner=128, - num_hidden_layers=5, - type_sequence_label_size=2, - untie_r=True, - bi_data=False, - same_length=False, - initializer_range=0.05, - seed=1, - type_vocab_size=2, - bos_token_id=1, - eos_token_id=2, - pad_token_id=5, - num_choices=4, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.mem_len = mem_len - # self.key_len = seq_length + mem_len - self.clamp_len = clamp_len - self.reuse_len = reuse_len - self.is_training = is_training - self.use_labels = use_labels - self.vocab_size = vocab_size - self.cutoffs = cutoffs - self.hidden_size = hidden_size - self.num_attention_heads = num_attention_heads - self.d_inner = d_inner - self.num_hidden_layers = num_hidden_layers - self.bi_data = bi_data - self.untie_r = untie_r - self.same_length = same_length - self.initializer_range = initializer_range - self.seed = seed - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.bos_token_id = bos_token_id - self.pad_token_id = pad_token_id - self.eos_token_id = eos_token_id - self.num_choices = num_choices - - def prepare_config_and_inputs(self): - input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() - - input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size) - perm_mask = torch.zeros( - self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device, - ) - perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token - target_mapping = torch.zeros( - self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device, - ) - target_mapping[:, 0, -1] = 1.0 # predict last token - - sequence_labels = None - lm_labels = None - is_impossible_labels = None - token_labels = None - if self.use_labels: - lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - is_impossible_labels = ids_tensor([self.batch_size], 2).float() - token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - config = XLNetConfig( - vocab_size=self.vocab_size, - d_model=self.hidden_size, - n_head=self.num_attention_heads, - d_inner=self.d_inner, - n_layer=self.num_hidden_layers, - untie_r=self.untie_r, - mem_len=self.mem_len, - clamp_len=self.clamp_len, - same_length=self.same_length, - reuse_len=self.reuse_len, - bi_data=self.bi_data, - initializer_range=self.initializer_range, - num_labels=self.type_sequence_label_size, - bos_token_id=self.bos_token_id, - pad_token_id=self.pad_token_id, - eos_token_id=self.eos_token_id, - ) - - return ( - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ) - - def set_seed(self): - random.seed(self.seed) - torch.manual_seed(self.seed) - - def create_and_check_xlnet_base_model( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ): - model = XLNetModel(config) - model.to(torch_device) - model.eval() - - _, _ = model(input_ids_1, input_mask=input_mask) - _, _ = model(input_ids_1, attention_mask=input_mask) - _, _ = model(input_ids_1, token_type_ids=segment_ids) - outputs, mems_1 = model(input_ids_1) - - result = { - "mems_1": mems_1, - "outputs": outputs, - } - - config.mem_len = 0 - model = XLNetModel(config) - model.to(torch_device) - model.eval() - no_mems_outputs = model(input_ids_1) - self.parent.assertEqual(len(no_mems_outputs), 1) - - self.parent.assertListEqual( - list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_base_model_with_att_output( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ): - model = XLNetModel(config) - model.to(torch_device) - model.eval() - - _, _, attentions = model(input_ids_1, target_mapping=target_mapping, output_attentions=True) - - self.parent.assertEqual(len(attentions), config.n_layer) - self.parent.assertIsInstance(attentions[0], tuple) - self.parent.assertEqual(len(attentions[0]), 2) - self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape) - - def create_and_check_xlnet_lm_head( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ): - model = XLNetLMHeadModel(config) - model.to(torch_device) - model.eval() - - loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels) - - loss_2, all_logits_2, mems_2 = model( - input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1 - ) - - logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping) - - result = { - "loss_1": loss_1, - "mems_1": mems_1, - "all_logits_1": all_logits_1, - "loss_2": loss_2, - "mems_2": mems_2, - "all_logits_2": all_logits_2, - } - - self.parent.assertListEqual(list(result["loss_1"].size()), []) - self.parent.assertListEqual( - list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - self.parent.assertListEqual(list(result["loss_2"].size()), []) - self.parent.assertListEqual( - list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_2"]), - [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_qa( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ): - model = XLNetForQuestionAnswering(config) - model.to(torch_device) - model.eval() - - outputs = model(input_ids_1) - (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems,) = outputs - - outputs = model( - input_ids_1, - start_positions=sequence_labels, - end_positions=sequence_labels, - cls_index=sequence_labels, - is_impossible=is_impossible_labels, - p_mask=input_mask, - ) - - outputs = model( - input_ids_1, - start_positions=sequence_labels, - end_positions=sequence_labels, - cls_index=sequence_labels, - is_impossible=is_impossible_labels, - ) - - total_loss, mems = outputs - - outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels,) - - total_loss, mems = outputs - - result = { - "loss": total_loss, - "start_top_log_probs": start_top_log_probs, - "start_top_index": start_top_index, - "end_top_log_probs": end_top_log_probs, - "end_top_index": end_top_index, - "cls_logits": cls_logits, - "mems": mems, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top], - ) - self.parent.assertListEqual( - list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top], - ) - self.parent.assertListEqual( - list(result["end_top_log_probs"].size()), - [self.batch_size, model.config.start_n_top * model.config.end_n_top], - ) - self.parent.assertListEqual( - list(result["end_top_index"].size()), - [self.batch_size, model.config.start_n_top * model.config.end_n_top], - ) - self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_token_classif( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ): - model = XLNetForTokenClassification(config) - model.to(torch_device) - model.eval() - - logits, mems_1 = model(input_ids_1) - loss, logits, mems_1 = model(input_ids_1, labels=token_labels) - - result = { - "loss": loss, - "mems_1": mems_1, - "logits": logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def create_and_check_xlnet_sequence_classif( - self, - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ): - model = XLNetForSequenceClassification(config) - model.to(torch_device) - model.eval() - - logits, mems_1 = model(input_ids_1) - loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels) - - result = { - "loss": loss, - "mems_1": mems_1, - "logits": logits, - } - - self.parent.assertListEqual(list(result["loss"].size()), []) - self.parent.assertListEqual( - list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size], - ) - self.parent.assertListEqual( - list(list(mem.size()) for mem in result["mems_1"]), - [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, - ) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids_1} - return config, inputs_dict - def setUp(self): - self.model_tester = XLNetModelTest.XLNetModelTester(self) + self.model_tester = XLNetModelTester(self) self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) def test_config(self):