diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py index c6c3be62c4..c8a069784d 100644 --- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py @@ -191,20 +191,21 @@ def convert_bloom_checkpoint_to_pytorch( tensors[key] = tensors[key] / pretraining_tp other_keys = model.load_state_dict(tensors, strict=False) - assert not other_keys.unexpected_keys + assert not other_keys.unexpected_keys, f"The keys {other_keys.unexpected_keys} are unexpected" if missing_keys is None: missing_keys = set(other_keys.missing_keys) else: missing_keys = missing_keys.intersection(set(other_keys.missing_keys)) - assert not missing_keys + assert not missing_keys, f"The keys {missing_keys} are missing" # Save pytorch-model os.makedirs(pytorch_dump_folder_path, exist_ok=True) pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME print(f"Save PyTorch model to {pytorch_weights_dump_path} with dtype {config.torch_dtype}") - model = model.to(config.torch_dtype) + if config.torch_dtype is not None: + model = model.to(config.torch_dtype) torch.save(model.state_dict(), pytorch_weights_dump_path) print(f"Save configuration file to {pytorch_config_dump_path}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: diff --git a/src/transformers/models/gpt2/CONVERSION.md b/src/transformers/models/gpt2/CONVERSION.md new file mode 100644 index 0000000000..d42ea1db9c --- /dev/null +++ b/src/transformers/models/gpt2/CONVERSION.md @@ -0,0 +1,9 @@ +Here is how to convert a GPT2 model generated outside of `transformers` + +* [Megatron-LM](https://github.com/NVIDIA/Megatron-LM)-generated model: + +Use [convert_megatron_gpt2_checkpoint.py](../megatron_gpt2/convert_megatron_gpt2_checkpoint.py) + +* [big-science fork of Megatron-Deepspeed](https://github.com/bigscience-workshop/Megatron-DeepSpeed/)-generated model: + +Use the instructions [here](https://github.com/bigscience-workshop/bigscience/tree/aa872e754106f6678e8a9dac8c6962404ba39a6d/train/tr1-13B-base#checkpoint-conversion-and-upload). This approach uses a set of scripts that require the use of this particular fork of Megatron-Deepspeed.