From fda2f623953bfe2290cd65429eb008f02ebdb152 Mon Sep 17 00:00:00 2001 From: Catalin Voss Date: Sun, 24 Mar 2019 14:37:13 -0700 Subject: [PATCH] Fix test failures due to old torch issue with non-contiguous view --- pytorch_pretrained_bert/modeling_gpt2.py | 8 ++++---- pytorch_pretrained_bert/modeling_openai.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pytorch_pretrained_bert/modeling_gpt2.py b/pytorch_pretrained_bert/modeling_gpt2.py index 635326b408..7a0bb4db53 100644 --- a/pytorch_pretrained_bert/modeling_gpt2.py +++ b/pytorch_pretrained_bert/modeling_gpt2.py @@ -618,8 +618,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): lm_logits = self.lm_head(hidden_states) if lm_labels is not None: # Shift so that tokens < n predict n - shift_logits = lm_logits[:, :-1] - shift_labels = lm_labels[:, 1:] + shift_logits = lm_logits[:, :-1].contiguous() + shift_labels = lm_labels[:, 1:].contiguous() # In tensorflow, it's [batch, d_0, d_1, ..., d_{r-1}, num_classes] # in pytorch, it's [batch, num_classes, d_0, d_1, ..., d_{r-1}] @@ -698,8 +698,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): mc_logits = self.multiple_choice_head(hidden_states, mc_token_ids) losses = [] if lm_labels is not None: - shift_logits = lm_logits[:, :-1] - shift_labels = lm_labels[:, 1:] + shift_logits = lm_logits[:, :-1].contiguous() + shift_labels = lm_labels[:, 1:].contiguous() loss_fct = CrossEntropyLoss(ignore_index=-1) losses.append(loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))) diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py index 8c1dd5e4a3..4385c1eaa7 100644 --- a/pytorch_pretrained_bert/modeling_openai.py +++ b/pytorch_pretrained_bert/modeling_openai.py @@ -717,8 +717,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): lm_logits = self.lm_head(hidden_states) if lm_labels is not None: # Shift so that tokens < n predict n - shift_logits = lm_logits[:, :-1] - shift_labels = lm_labels[:, 1:] + shift_logits = lm_logits[:, :-1].contiguous() + shift_labels = lm_labels[:, 1:].contiguous() # In tensorflow, it's [batch, d_0, d_1, ..., d_{r-1}, num_classes] # in pytorch, it's [batch, num_classes, d_0, d_1, ..., d_{r-1}] @@ -811,8 +811,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): mc_logits = self.multiple_choice_head(hidden_states, mc_token_ids) losses = [] if lm_labels is not None: - shift_logits = lm_logits[:, :-1] - shift_labels = lm_labels[:, 1:] + shift_logits = lm_logits[:, :-1].contiguous() + shift_labels = lm_labels[:, 1:].contiguous() loss_fct = CrossEntropyLoss(ignore_index=-1) losses.append(loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)))