optimum/amd/ryzenai/configuration.py (85 lines of code) (raw):
# Copyright 2023 The HuggingFace Team. All rights reserved.
# Licensed under the MIT License.
"""Configuration classes for quantization with RyzenAI."""
from dataclasses import asdict, dataclass
from enum import Enum
from typing import Optional
import vai_q_onnx
from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantType
from optimum.configuration_utils import BaseConfig
@dataclass
class QuantizationConfig:
"""
QuantizationConfig is the configuration class handling all the RyzenAI quantization parameters.
Args:
is_static (`bool`):
Whether to apply static quantization or dynamic quantization.
format (`QuantFormat`):
Targeted RyzenAI quantization representation format.
For the Operator Oriented (QOperator) format, all the quantized operators have their own ONNX definitions.
For the Tensor Oriented (QDQ) format, the model is quantized by inserting QuantizeLinear / DeQuantizeLinear
operators.
calibration_method (`CalibrationMethod`):
The method chosen to calculate the activations quantization parameters using the calibration dataset.
activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`):
The quantization data types to use for the activations.
activations_symmetric (`bool`, defaults to `False`):
Whether to apply symmetric quantization on the activations.
weights_dtype (`QuantType`, defaults to `QuantType.QInt8`):
The quantization data types to use for the weights.
weights_symmetric (`bool`, defaults to `True`):
Whether to apply symmetric quantization on the weights.
enable_dpu (`bool`, defaults to `True`):
Determines whether to generate a quantized model that is suitable for the DPU. If set to True, the quantization
process will create a model that is optimized for DPU computations.
"""
format: QuantFormat = QuantFormat.QDQ
calibration_method: CalibrationMethod = vai_q_onnx.PowerOfTwoMethod.MinMSE
activations_dtype: QuantType = QuantType.QUInt8
activations_symmetric: bool = True
weights_dtype: QuantType = QuantType.QInt8
weights_symmetric: bool = True
enable_dpu: bool = True
@staticmethod
def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str:
return (
f"{'s8' if activations_dtype == QuantType.QInt8 else 'u8'}"
f"/"
f"{'s8' if weights_dtype == QuantType.QInt8 else 'u8'}"
)
@property
def use_symmetric_calibration(self) -> bool:
return self.activations_symmetric and self.weights_symmetric
def __str__(self):
return (
f"{self.format} ("
f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, "
f"enable_dpu: {self.enable_dpu})"
)
class AutoQuantizationConfig:
@staticmethod
def ipu_cnn_config():
return QuantizationConfig(
format=QuantFormat.QDQ,
calibration_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
activations_dtype=QuantType.QUInt8,
activations_symmetric=True,
weights_dtype=QuantType.QInt8,
weights_symmetric=True,
enable_dpu=True,
)
@staticmethod
def cpu_cnn_config(
use_symmetric_activations: bool = False,
use_symmetric_weights: bool = True,
enable_dpu: bool = False,
):
return QuantizationConfig(
format=QuantFormat.QDQ,
calibration_method=vai_q_onnx.CalibrationMethod.MinMax,
activations_dtype=QuantType.QUInt8,
activations_symmetric=use_symmetric_activations,
weights_dtype=QuantType.QInt8,
weights_symmetric=use_symmetric_weights,
enable_dpu=enable_dpu,
)
class RyzenAIConfig(BaseConfig):
"""
RyzenAIConfig is the configuration class handling all the VitisAI parameters related to the ONNX IR model export,
and quantization parameters.
Attributes:
opset (`Optional[int]`, defaults to `None`):
ONNX opset version to export the model with.
quantization (`Optional[QuantizationConfig]`, defaults to `None`):
Specify a configuration to quantize ONNX model
"""
CONFIG_NAME = "ryzenai_config.json"
FULL_CONFIGURATION_FILE = "ryzenai_config.json"
def __init__(
self,
opset: Optional[int] = None,
quantization: Optional[QuantizationConfig] = None,
**kwargs,
):
super().__init__()
self.opset = opset
self.quantization = self.dataclass_to_dict(quantization)
self.optimum_version = kwargs.pop("optimum_version", None)
@staticmethod
def dataclass_to_dict(config) -> dict:
new_config = {}
if config is None:
return new_config
if isinstance(config, dict):
return config
for k, v in asdict(config).items():
if isinstance(v, Enum):
v = v.name
elif isinstance(v, list):
v = [elem.name if isinstance(elem, Enum) else elem for elem in v]
new_config[k] = v
return new_config