in optimum/intel/openvino/configuration.py [0:0]
def post_init(self):
r"""
Safety checker that arguments are correct
"""
super().post_init()
if not (0 <= self.ratio <= 1):
raise ValueError("`ratio` must between 0 and 1.")
if self.group_size is not None and self.group_size != -1 and self.group_size <= 0:
raise ValueError("`group_size` must be greater than 0 or equal to -1")
if not (self.dataset is None or isinstance(self.dataset, (str, list))):
raise ValueError(
f"Dataset must be a instance of either string or list of strings, but found {type(self.dataset)}. "
f"If you wish to provide a custom dataset, please use the `OVQuantizer` instead."
)
if self.dataset is not None and isinstance(self.dataset, str):
visual_lm_datasets = set(PREDEFINED_VISUAL_LM_DATASETS.keys())
stable_diffusion_datasets = set(PREDEFINED_SD_DATASETS.keys())
language_datasets = set(PREDEFINED_LANGUAGE_DATASETS.keys())
if (
self.dataset
not in PREDEFINED_CAUSAL_LANGUAGE_DATASETS
| language_datasets
| visual_lm_datasets
| stable_diffusion_datasets
):
raise ValueError(
"You have entered a string value for dataset. You can only choose between "
f"{language_datasets} for text feature extraction models, "
f"{PREDEFINED_CAUSAL_LANGUAGE_DATASETS} for LLMs, {visual_lm_datasets} for visual LLMs or "
f"{stable_diffusion_datasets} for diffusion models, but we found {self.dataset}."
)
if self.dataset is not None and not (
self.quant_method in [OVQuantizationMethod.AWQ, OVQuantizationMethod.HYBRID]
or self.scale_estimation
or self.gptq
or self.lora_correction
or (self.ratio < 1.0 and self.sensitivity_metric != nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR)
):
logger.warning(
"The provided dataset won't have any effect on the resulting compressed model because no data-aware "
"quantization algorithm is selected and compression ratio is 1.0."
)
if self.dataset is None and self.quant_method == OVQuantizationMethod.AWQ and is_nncf_version("<", "2.17.0"):
raise ValueError("Data-free AWQ is available starting form NNCF 2.17. Please update nncf package.")
if self.dtype in ["int4", "int8"]:
bits = 4 if self.dtype == "int4" else 8
if self.bits is not None and self.bits != bits:
logger.warning(
f"Overriding `bits` parameter to the value `bits`={bits} to match the given {self.dtype} `dtype`."
)
self.bits = bits
if self.bits not in [4, 8]:
raise ValueError(f"Only support quantization to [4,8] bits but found {self.bits}")
if self.bits == 8 and self.dtype:
if self.ratio != 1:
raise ValueError(
f"For 8-bit quantization, `ratio` is expected to be set to 1.0, but was set to {self.ratio}"
)
if self.group_size != -1:
raise ValueError(
f"For 8-bit quantization, `group_size` is expected to be set to -1, but was set to {self.group_size}"
)
if self.all_layers:
raise ValueError("The `all_layers` parameter is not supported for 8-bit quantization")
if self.sensitivity_metric:
raise ValueError("The `sensitivity_metric` parameter is not supported for 8-bit quantization")
if self.quant_method == OVQuantizationMethod.AWQ:
raise ValueError(
"The AWQ algorithm is not supported for 8-bit quantization and got `quant_method='awq'`, please update accordingly"
)
if self.scale_estimation:
raise ValueError(
"The Scale Estimation algorithm is not supported for 8-bit quantization and got `scale_estimation=True`, please set `scale_estimation=False`"
)
if self.gptq:
raise ValueError(
"The GPTQ algorithm is not supported for 8-bit quantization and got `gptq=True`, please set `gptq=False`"
)
if self.lora_correction:
raise ValueError(
"The LoRA Correction algorithm is not supported for 8-bit quantization and got `lora_correction=True`, please set `lora_correction=False`"
)
if self.backup_precision is not None:
raise ValueError(
f"The `backup_precision` parameter is not supported for 8-bit quantization and got "
f"`backup_precision={self.backup_precision}`, please set `backup_precision=None`"
)
if self.backup_precision is not None and self.backup_precision not in ["none", "int8_sym", "int8_asym"]:
raise ValueError(
f"`backup_precision` parameter must be on of the following: ['none', 'int8_sym', 'int8_asym'], but found{self.backup_precision}"
)
if self.tokenizer is not None and not isinstance(self.tokenizer, str):
raise ValueError(f"Tokenizer is expected to be a string, but found {self.tokenizer}")
if self.processor is not None and not isinstance(self.processor, str):
raise ValueError(f"Processor is expected to be a string, but found {self.processor}")
if self.dtype is None:
self.dtype = "int4" if self.bits == 4 else "int8"
if self.dtype not in ["int4", "int8", "mxfp4", "nf4"]:
raise ValueError(
f"Weights quantization data type must be one of the following: ['int4', 'int8', 'mxfp4', 'nf4'], but found: {self.dtype}."
)
if self.dtype in ["mxfp4", "nf4"]:
if self.bits != 4:
raise ValueError(
f"When applying weight compression with '{self.dtype}' data type, the `bits` parameter must be set to 4, but found {self.bits}"
)
if self.dtype == "mxfp4":
if self.quant_method == OVQuantizationMethod.AWQ:
raise ValueError("The AWQ algorithm is not supported for 'mxpf4' data type")
if self.scale_estimation:
raise ValueError("The Scale Estimation algorithm is not supported for 'mxpf4' data type")
if self.gptq:
raise ValueError("The GPTQ algorithm is not supported for 'mxfp4' data type")
if self.lora_correction:
raise ValueError("The LoRA Correction algorithm is not supported for 'mxfp4' data type")
if self.gptq and self.lora_correction:
raise ValueError("The GPTQ and LoRA Correction algorithms can't be applied simultaneously")