in optimum/habana/transformers/integrations/awq.py [0:0]
def gaudi_awq_config_post_init(self):
"""
Adapted from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/utils/quantization_config.py#L818
- support HPU.
"""
if self.backend not in [AwqBackendPackingMethod.AUTOAWQ]:
raise ValueError(
f"Only supported quantization backends in {AwqBackendPackingMethod.AUTOAWQ} - not recognized backend {self.backend}"
)
self.version = GaudiAWQLinearVersion.from_str(self.version)
if self.version not in [
GaudiAWQLinearVersion.HPU,
GaudiAWQLinearVersion.GEMM,
]:
raise ValueError(
f"Only supported versions are in [GaudiAWQLinearVersion.HPU, GaudiAWQLinearVersion.GEMM] - not recognized version {self.version}"
)
if self.do_fuse and self.fuse_max_seq_len is None:
raise ValueError(
"You cannot enable fused modules without specifying a `fuse_max_seq_len`, make sure to pass a valid `fuse_max_seq_len` for your usecase"
)
if self.do_fuse:
awq_version_supports_fusing = False
MIN_AWQ_VERSION = "0.1.7"
if is_auto_awq_available():
awq_version_supports_fusing = version.parse(importlib.metadata.version("autoawq")) >= version.parse(
MIN_AWQ_VERSION
)
if not awq_version_supports_fusing:
raise ValueError(
f"You current version of `autoawq` does not support module fusing, please upgrade `autoawq` package to at least {MIN_AWQ_VERSION}."
)
if self.modules_to_not_convert is not None:
awq_version_supports_non_conversion = False
MIN_AWQ_VERSION = "0.1.8"
if is_auto_awq_available():
awq_version_supports_non_conversion = version.parse(
importlib.metadata.version("autoawq")
) >= version.parse(MIN_AWQ_VERSION)
if not awq_version_supports_non_conversion:
raise ValueError(
f"You current version of `autoawq` does not support module quantization skipping, please upgrade `autoawq` package to at least {MIN_AWQ_VERSION}."
)
if self.do_fuse and self.modules_to_fuse is not None:
raise ValueError("You current implementation of `autoawq` does not support do_fuse and modules_to_fuse.")