2023-02-16 06:11:49 +08:00
import os
2023-03-10 21:07:18 +08:00
import shutil
2023-02-16 06:11:49 +08:00
from dataclasses import dataclass , field
from typing import Optional
from pathlib import Path
2023-03-10 02:05:18 +08:00
from transformers import AutoTokenizer , HfArgumentParser
2023-03-10 21:07:18 +08:00
from transformers . utils import cached_file
2023-02-16 06:11:49 +08:00
from optimum . utils import DEFAULT_DUMMY_SHAPES
from optimum . exporters . tasks import TasksManager
from optimum . exporters . onnx . utils import (
get_decoder_models_for_export ,
get_encoder_decoder_models_for_export
)
from optimum . exporters . onnx . convert import (
export ,
export_models
)
2023-03-10 02:05:18 +08:00
from optimum . onnx . graph_transformations import merge_decoders
from optimum . onnxruntime . utils import (
ONNX_WEIGHTS_NAME ,
ONNX_ENCODER_NAME ,
ONNX_DECODER_NAME ,
ONNX_DECODER_WITH_PAST_NAME ,
ONNX_DECODER_MERGED_NAME
)
2023-02-21 22:53:32 +08:00
from onnxruntime . quantization import quantize_dynamic , QuantType
from tqdm import tqdm
2023-02-16 06:11:49 +08:00
@dataclass
class ConversionArguments :
"""
Arguments used for converting HuggingFace models to onnx .
"""
model_id : str = field (
metadata = {
" help " : " Model identifier "
}
)
quantize : bool = field (
default = False ,
metadata = {
" help " : " Whether to quantize the model. "
}
)
input_parent_dir : str = field (
default = ' ./models/pytorch/ ' ,
metadata = {
" help " : " Path where the original model will be loaded from. "
}
)
output_parent_dir : str = field (
default = ' ./models/onnx/ ' ,
metadata = {
" help " : " Path where the converted model will be saved to. "
}
)
task : Optional [ str ] = field (
default = ' default ' ,
metadata = {
" help " : (
" The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: "
f " { str ( list ( TasksManager . _TASKS_TO_AUTOMODELS . keys ( ) ) ) } . For decoder models, use `xxx-with-past` to export the model using past key values in the decoder. "
)
}
)
opset : int = field (
default = None ,
metadata = {
" help " : (
" If specified, ONNX opset version to export the model with. Otherwise, the default opset will be used. "
)
}
)
device : str = field (
default = ' cpu ' ,
metadata = {
" help " : ' The device to use to do the export. Defaults to " cpu " . '
}
)
2023-03-02 01:12:32 +08:00
from_hub : bool = field (
default = False ,
metadata = {
" help " : " Whether to use local files, or from the HuggingFace Hub. "
}
)
2023-03-10 02:05:18 +08:00
merge_decoders : bool = field (
default = True ,
metadata = {
" help " : " Whether to fuse decoder ONNX model and decoder with past ONNX model into one ONNX model with if logic "
}
)
overwrite : bool = field (
default = False ,
metadata = {
" help " : " Whether to overwriting existing models "
}
)
2023-02-16 06:11:49 +08:00
2023-03-08 05:53:50 +08:00
UNSIGNED_MODEL_TYPES = [
2023-03-12 01:21:01 +08:00
' whisper ' ,
2023-03-14 07:28:22 +08:00
' vision-encoder-decoder ' ,
' vit ' ,
2023-03-08 05:53:50 +08:00
]
def quantize ( models_name_or_path , model_type ) :
2023-02-21 22:53:32 +08:00
"""
Quantize the weights of the model from float32 to int8 to allow very efficient inference on modern CPU
Uses unsigned ints for activation values , signed ints for weights , per
https : / / onnxruntime . ai / docs / performance / quantization . html #data-type-selection
it is faster on most CPU architectures
Args :
onnx_model_path : Path to location the exported ONNX model is stored
Returns : The Path generated for the quantized
"""
2023-03-08 05:53:50 +08:00
# As per docs, signed weight type (QInt8) is faster on most CPUs
# However, for some model types (e.g., whisper), we have to use
# unsigned weight type (QUInt8). For more info:
# https://github.com/microsoft/onnxruntime/issues/3130#issuecomment-1105200621
if model_type in UNSIGNED_MODEL_TYPES :
weight_type = QuantType . QUInt8
else :
# Default
weight_type = QuantType . QInt8
2023-02-21 22:53:32 +08:00
for model in tqdm ( models_name_or_path , desc = ' Quantizing ' ) :
# model_name = os.path.splitext(os.path.basename(model))[0]
quantize_dynamic (
model_input = model ,
model_output = model ,
per_channel = True ,
reduce_range = True , # should be the same as per_channel
2023-03-08 05:53:50 +08:00
weight_type = weight_type ,
2023-02-21 22:53:32 +08:00
optimize_model = False ,
) # op_types_to_quantize=['MatMul', 'Relu', 'Add', 'Mul' ],
2023-03-10 21:07:18 +08:00
def copy_if_exists ( model_path , file_name , destination ) :
file = cached_file ( model_path , file_name ,
_raise_exceptions_for_missing_entries = False )
if file is not None :
shutil . copy ( file , destination )
2023-02-16 06:11:49 +08:00
def main ( ) :
# Helper script to fix inconsistencies between optimum exporter and other exporters.
# T5 uses similar approach to fastT5 (https://github.com/Ki6an/fastT5)
parser = HfArgumentParser (
( ConversionArguments , )
)
conv_args , = parser . parse_args_into_dataclasses ( )
2023-03-10 02:05:18 +08:00
input_model_path = os . path . join (
conv_args . input_parent_dir ,
conv_args . model_id
)
2023-03-02 01:12:32 +08:00
if conv_args . from_hub :
model_path = conv_args . model_id
else :
2023-03-10 02:05:18 +08:00
model_path = input_model_path
2023-02-16 06:11:49 +08:00
# Infer the task
task = conv_args . task
if task == " auto " :
try :
2023-03-02 01:12:32 +08:00
task = TasksManager . infer_task_from_model ( model_path )
2023-02-16 06:11:49 +08:00
except KeyError as e :
raise KeyError (
f " The task could not be automatically inferred. Please provide the argument --task with the task from { ' , ' . join ( TasksManager . get_all_tasks ( ) ) } . Detailed error: { e } "
)
2023-03-02 01:12:32 +08:00
output_model_folder = os . path . join (
conv_args . output_parent_dir ,
' quantized ' if conv_args . quantize else ' unquantized ' ,
conv_args . model_id ,
task
)
2023-02-16 06:11:49 +08:00
# get the shapes to be used to generate dummy inputs
2023-03-10 02:05:18 +08:00
input_shapes = DEFAULT_DUMMY_SHAPES . copy ( )
2023-02-16 06:11:49 +08:00
model = TasksManager . get_model_from_task (
2023-03-10 02:05:18 +08:00
task , model_path ,
2023-02-16 06:11:49 +08:00
)
onnx_config_constructor = TasksManager . get_exporter_config_constructor (
model = model , exporter = ' onnx ' , task = task )
onnx_config = onnx_config_constructor ( model . config )
# Ensure the requested opset is sufficient
if conv_args . opset is None :
conv_args . opset = onnx_config . DEFAULT_ONNX_OPSET
elif conv_args . opset < onnx_config . DEFAULT_ONNX_OPSET :
raise ValueError (
f " Opset { conv_args . opset } is not sufficient to export { model . config . model_type } . "
f " At least { onnx_config . DEFAULT_ONNX_OPSET } is required. "
)
2023-03-10 21:07:18 +08:00
# Create output folder
os . makedirs ( output_model_folder , exist_ok = True )
# Copy certain JSON files, which save_pretrained doesn't handle
copy_if_exists ( model_path , ' tokenizer.json ' , output_model_folder )
copy_if_exists ( model_path , ' preprocessor_config.json ' , output_model_folder )
2023-03-10 02:05:18 +08:00
2023-03-14 07:28:22 +08:00
if model . can_generate ( ) :
copy_if_exists ( model_path , ' generation_config.json ' , output_model_folder )
2023-02-16 06:11:49 +08:00
# Saving the model config
model . config . save_pretrained ( output_model_folder )
2023-03-14 07:28:22 +08:00
try :
# Save tokenizer
tokenizer = AutoTokenizer . from_pretrained ( model_path )
tokenizer . save_pretrained ( output_model_folder )
except KeyError :
pass # No Tokenizer
2023-02-16 06:11:49 +08:00
2023-03-10 02:05:18 +08:00
# Specify output paths
OUTPUT_WEIGHTS_PATH = os . path . join ( output_model_folder , ONNX_WEIGHTS_NAME )
OUTPUT_ENCODER_PATH = os . path . join ( output_model_folder , ONNX_ENCODER_NAME )
OUTPUT_DECODER_PATH = os . path . join ( output_model_folder , ONNX_DECODER_NAME )
OUTPUT_DECODER_WITH_PAST_PATH = os . path . join (
output_model_folder , ONNX_DECODER_WITH_PAST_NAME )
OUTPUT_DECODER_MERGED_PATH = os . path . join (
output_model_folder , ONNX_DECODER_MERGED_NAME )
2023-02-16 06:11:49 +08:00
onnx_model_paths = [ ]
# Step 1. convert huggingface model to onnx
2023-02-21 22:53:32 +08:00
if model . config . is_encoder_decoder or task . startswith ( " causal-lm " ) :
if model . config . is_encoder_decoder and task . startswith ( " causal-lm " ) :
raise ValueError (
f " model.config.is_encoder_decoder is True and task is ` { task } `, which are incompatible. If the task was auto-inferred, please fill a bug report "
f " at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model, "
f " referring to `optimum.exporters.tasks.TaskManager` ' s `_TASKS_TO_AUTOMODELS`. "
)
if model . config . is_encoder_decoder :
models_and_onnx_configs = get_encoder_decoder_models_for_export (
model ,
onnx_config
2023-02-16 06:11:49 +08:00
)
else :
2023-02-21 22:53:32 +08:00
models_and_onnx_configs = get_decoder_models_for_export (
model ,
onnx_config
2023-02-16 06:11:49 +08:00
)
2023-02-21 22:53:32 +08:00
onnx_model_paths = [
os . path . join ( output_model_folder , f ' { x } .onnx ' )
for x in models_and_onnx_configs
]
2023-03-10 02:05:18 +08:00
# Check if at least one model doesn't exist, or user requests to overwrite
if any (
not os . path . exists ( x ) for x in onnx_model_paths
) or conv_args . overwrite :
export_models (
models_and_onnx_configs = models_and_onnx_configs ,
opset = conv_args . opset ,
output_dir = output_model_folder ,
input_shapes = input_shapes ,
device = conv_args . device ,
)
2023-02-21 22:53:32 +08:00
else :
2023-03-10 02:05:18 +08:00
output_path = Path ( OUTPUT_WEIGHTS_PATH )
# Check if model doesn't exist, or user requests to overwrite
if not os . path . exists ( output_path ) or conv_args . overwrite :
export (
model = model ,
config = onnx_config ,
output = output_path ,
opset = conv_args . opset ,
input_shapes = input_shapes ,
device = conv_args . device ,
)
2023-02-21 22:53:32 +08:00
onnx_model_paths . append ( output_path )
2023-02-16 06:11:49 +08:00
# Step 2. (optional, recommended) quantize the converted model for fast inference and to reduce model size.
if conv_args . quantize :
2023-03-08 05:53:50 +08:00
quantize ( onnx_model_paths , model . config . model_type )
2023-03-10 02:05:18 +08:00
# Step 3. merge decoders.
if conv_args . merge_decoders and (
os . path . exists ( OUTPUT_DECODER_PATH ) and os . path . exists (
OUTPUT_DECODER_WITH_PAST_PATH )
) and ( not os . path . exists ( OUTPUT_DECODER_MERGED_PATH ) or conv_args . overwrite ) :
print ( ' Merging decoders ' )
merge_decoders (
OUTPUT_DECODER_PATH ,
OUTPUT_DECODER_WITH_PAST_PATH ,
save_path = OUTPUT_DECODER_MERGED_PATH
)
2023-02-16 06:11:49 +08:00
if __name__ == ' __main__ ' :
main ( )