transformers.js/scripts/convert.py

307 lines
10 KiB
Python
Raw Normal View History

2023-02-16 06:11:49 +08:00
import os
import shutil
2023-02-16 06:11:49 +08:00
from dataclasses import dataclass, field
from typing import Optional
from pathlib import Path
from transformers import AutoTokenizer, HfArgumentParser
from transformers.utils import cached_file
2023-02-16 06:11:49 +08:00
from optimum.utils import DEFAULT_DUMMY_SHAPES
from optimum.exporters.tasks import TasksManager
from optimum.exporters.onnx.utils import (
get_decoder_models_for_export,
get_encoder_decoder_models_for_export
)
from optimum.exporters.onnx.convert import (
export,
export_models
)
from optimum.onnx.graph_transformations import merge_decoders
from optimum.onnxruntime.utils import (
ONNX_WEIGHTS_NAME,
ONNX_ENCODER_NAME,
ONNX_DECODER_NAME,
ONNX_DECODER_WITH_PAST_NAME,
ONNX_DECODER_MERGED_NAME
)
2023-02-21 22:53:32 +08:00
from onnxruntime.quantization import quantize_dynamic, QuantType
from tqdm import tqdm
2023-02-16 06:11:49 +08:00
@dataclass
class ConversionArguments:
"""
Arguments used for converting HuggingFace models to onnx.
"""
model_id: str = field(
metadata={
"help": "Model identifier"
}
)
quantize: bool = field(
default=False,
metadata={
"help": "Whether to quantize the model."
}
)
input_parent_dir: str = field(
default='./models/pytorch/',
metadata={
"help": "Path where the original model will be loaded from."
}
)
output_parent_dir: str = field(
default='./models/onnx/',
metadata={
"help": "Path where the converted model will be saved to."
}
)
task: Optional[str] = field(
default='default',
metadata={
"help": (
"The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:"
f" {str(list(TasksManager._TASKS_TO_AUTOMODELS.keys()))}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
)
}
)
opset: int = field(
default=None,
metadata={
"help": (
"If specified, ONNX opset version to export the model with. Otherwise, the default opset will be used."
)
}
)
device: str = field(
default='cpu',
metadata={
"help": 'The device to use to do the export. Defaults to "cpu".'
}
)
2023-03-02 01:12:32 +08:00
from_hub: bool = field(
default=False,
metadata={
"help": "Whether to use local files, or from the HuggingFace Hub."
}
)
merge_decoders: bool = field(
default=True,
metadata={
"help": "Whether to fuse decoder ONNX model and decoder with past ONNX model into one ONNX model with if logic"
}
)
overwrite: bool = field(
default=False,
metadata={
"help": "Whether to overwriting existing models"
}
)
2023-02-16 06:11:49 +08:00
2023-03-08 05:53:50 +08:00
UNSIGNED_MODEL_TYPES = [
'whisper'
]
def quantize(models_name_or_path, model_type):
2023-02-21 22:53:32 +08:00
"""
Quantize the weights of the model from float32 to int8 to allow very efficient inference on modern CPU
Uses unsigned ints for activation values, signed ints for weights, per
https://onnxruntime.ai/docs/performance/quantization.html#data-type-selection
it is faster on most CPU architectures
Args:
onnx_model_path: Path to location the exported ONNX model is stored
Returns: The Path generated for the quantized
"""
2023-03-08 05:53:50 +08:00
# As per docs, signed weight type (QInt8) is faster on most CPUs
# However, for some model types (e.g., whisper), we have to use
# unsigned weight type (QUInt8). For more info:
# https://github.com/microsoft/onnxruntime/issues/3130#issuecomment-1105200621
if model_type in UNSIGNED_MODEL_TYPES:
weight_type = QuantType.QUInt8
else:
# Default
weight_type = QuantType.QInt8
2023-02-21 22:53:32 +08:00
for model in tqdm(models_name_or_path, desc='Quantizing'):
# model_name = os.path.splitext(os.path.basename(model))[0]
quantize_dynamic(
model_input=model,
model_output=model,
per_channel=True,
reduce_range=True, # should be the same as per_channel
activation_type=QuantType.QUInt8,
2023-03-08 05:53:50 +08:00
weight_type=weight_type,
2023-02-21 22:53:32 +08:00
optimize_model=False,
) # op_types_to_quantize=['MatMul', 'Relu', 'Add', 'Mul' ],
def copy_if_exists(model_path, file_name, destination):
file = cached_file(model_path, file_name,
_raise_exceptions_for_missing_entries=False)
if file is not None:
shutil.copy(file, destination)
2023-02-16 06:11:49 +08:00
def main():
# Helper script to fix inconsistencies between optimum exporter and other exporters.
# T5 uses similar approach to fastT5 (https://github.com/Ki6an/fastT5)
parser = HfArgumentParser(
(ConversionArguments, )
)
conv_args, = parser.parse_args_into_dataclasses()
input_model_path = os.path.join(
conv_args.input_parent_dir,
conv_args.model_id
)
2023-03-02 01:12:32 +08:00
if conv_args.from_hub:
model_path = conv_args.model_id
else:
model_path = input_model_path
2023-02-16 06:11:49 +08:00
# Infer the task
task = conv_args.task
if task == "auto":
try:
2023-03-02 01:12:32 +08:00
task = TasksManager.infer_task_from_model(model_path)
2023-02-16 06:11:49 +08:00
except KeyError as e:
raise KeyError(
f"The task could not be automatically inferred. Please provide the argument --task with the task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
)
2023-03-02 01:12:32 +08:00
output_model_folder = os.path.join(
conv_args.output_parent_dir,
'quantized' if conv_args.quantize else 'unquantized',
conv_args.model_id,
task
)
2023-02-16 06:11:49 +08:00
# get the shapes to be used to generate dummy inputs
input_shapes = DEFAULT_DUMMY_SHAPES.copy()
2023-02-16 06:11:49 +08:00
model = TasksManager.get_model_from_task(
task, model_path,
2023-02-16 06:11:49 +08:00
)
onnx_config_constructor = TasksManager.get_exporter_config_constructor(
model=model, exporter='onnx', task=task)
onnx_config = onnx_config_constructor(model.config)
# Ensure the requested opset is sufficient
if conv_args.opset is None:
conv_args.opset = onnx_config.DEFAULT_ONNX_OPSET
elif conv_args.opset < onnx_config.DEFAULT_ONNX_OPSET:
raise ValueError(
f"Opset {conv_args.opset} is not sufficient to export {model.config.model_type}. "
f"At least {onnx_config.DEFAULT_ONNX_OPSET} is required."
)
# Create output folder
os.makedirs(output_model_folder, exist_ok=True)
# Copy certain JSON files, which save_pretrained doesn't handle
copy_if_exists(model_path, 'tokenizer.json', output_model_folder)
copy_if_exists(model_path, 'preprocessor_config.json', output_model_folder)
2023-02-16 06:11:49 +08:00
# Saving the model config
model.config.save_pretrained(output_model_folder)
# Save tokenizer
2023-03-02 01:12:32 +08:00
tokenizer = AutoTokenizer.from_pretrained(model_path)
2023-02-16 06:11:49 +08:00
tokenizer.save_pretrained(output_model_folder)
# Specify output paths
OUTPUT_WEIGHTS_PATH = os.path.join(output_model_folder, ONNX_WEIGHTS_NAME)
OUTPUT_ENCODER_PATH = os.path.join(output_model_folder, ONNX_ENCODER_NAME)
OUTPUT_DECODER_PATH = os.path.join(output_model_folder, ONNX_DECODER_NAME)
OUTPUT_DECODER_WITH_PAST_PATH = os.path.join(
output_model_folder, ONNX_DECODER_WITH_PAST_NAME)
OUTPUT_DECODER_MERGED_PATH = os.path.join(
output_model_folder, ONNX_DECODER_MERGED_NAME)
2023-02-16 06:11:49 +08:00
onnx_model_paths = []
# Step 1. convert huggingface model to onnx
2023-02-21 22:53:32 +08:00
if model.config.is_encoder_decoder or task.startswith("causal-lm"):
if model.config.is_encoder_decoder and task.startswith("causal-lm"):
raise ValueError(
f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report"
f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model,"
f" referring to `optimum.exporters.tasks.TaskManager`'s `_TASKS_TO_AUTOMODELS`."
)
if model.config.is_encoder_decoder:
models_and_onnx_configs = get_encoder_decoder_models_for_export(
model,
onnx_config
2023-02-16 06:11:49 +08:00
)
else:
2023-02-21 22:53:32 +08:00
models_and_onnx_configs = get_decoder_models_for_export(
model,
onnx_config
2023-02-16 06:11:49 +08:00
)
2023-02-21 22:53:32 +08:00
onnx_model_paths = [
os.path.join(output_model_folder, f'{x}.onnx')
for x in models_and_onnx_configs
]
# Check if at least one model doesn't exist, or user requests to overwrite
if any(
not os.path.exists(x) for x in onnx_model_paths
) or conv_args.overwrite:
export_models(
models_and_onnx_configs=models_and_onnx_configs,
opset=conv_args.opset,
output_dir=output_model_folder,
input_shapes=input_shapes,
device=conv_args.device,
)
2023-02-21 22:53:32 +08:00
else:
output_path = Path(OUTPUT_WEIGHTS_PATH)
# Check if model doesn't exist, or user requests to overwrite
if not os.path.exists(output_path) or conv_args.overwrite:
export(
model=model,
config=onnx_config,
output=output_path,
opset=conv_args.opset,
input_shapes=input_shapes,
device=conv_args.device,
)
2023-02-21 22:53:32 +08:00
onnx_model_paths.append(output_path)
2023-02-16 06:11:49 +08:00
# Step 2. (optional, recommended) quantize the converted model for fast inference and to reduce model size.
if conv_args.quantize:
2023-03-08 05:53:50 +08:00
quantize(onnx_model_paths, model.config.model_type)
# Step 3. merge decoders.
if conv_args.merge_decoders and (
os.path.exists(OUTPUT_DECODER_PATH) and os.path.exists(
OUTPUT_DECODER_WITH_PAST_PATH)
) and (not os.path.exists(OUTPUT_DECODER_MERGED_PATH) or conv_args.overwrite):
print('Merging decoders')
merge_decoders(
OUTPUT_DECODER_PATH,
OUTPUT_DECODER_WITH_PAST_PATH,
save_path=OUTPUT_DECODER_MERGED_PATH
)
2023-02-16 06:11:49 +08:00
if __name__ == '__main__':
main()