Fix failing DeepSpeed model zoo tests (#30112)

* fix sequence length errors

* fix label column name error for vit

* fix the lm_head embedding!=linear layer mismatches for Seq2Seq models
This commit is contained in:
Sourab Mangrulkar 2024-04-09 12:01:47 +05:30 committed by GitHub
parent 2f12e40822
commit 4e3490f79b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 1 deletions

View File

@ -1932,7 +1932,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
# if word embeddings are not tied, make sure that lm head is resized as well
if self.get_output_embeddings() is not None and not self.config.tie_word_embeddings:
old_lm_head = self.get_output_embeddings()
new_lm_head = self._get_resized_lm_head(old_lm_head, new_num_tokens)
if isinstance(old_lm_head, torch.nn.Embedding):
new_lm_head = self._get_resized_embeddings(old_lm_head, new_num_tokens)
else:
new_lm_head = self._get_resized_lm_head(old_lm_head, new_num_tokens)
if hasattr(old_lm_head, "_hf_hook"):
hook = old_lm_head._hf_hook
add_hook_to_module(new_lm_head, hook)

View File

@ -236,6 +236,8 @@ def make_task_cmds():
--train_file {data_dir_wmt}/train.json
--source_lang en
--target_lang ro
--max_source_length 12
--max_target_length 12
""",
"sum": f"""
{scripts_dir}/summarization/run_summarization.py
@ -269,6 +271,7 @@ def make_task_cmds():
--remove_unused_columns False
--max_steps 10
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
--label_column_name labels
""",
}