in optimum/habana/quantizers/bitsandbytes.py [0:0]
def gaudi_validate_environment(self, *args, **kwargs):
"""
Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/quantizers/quantizer_bnb_4bit.py#L68
Only difference is deletion of bitsandbytes version checks
"""
if not is_accelerate_available():
raise ImportError(
f"Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
)
if not gaudi_is_bitsandbytes_available():
raise ImportError(
"Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`"
)
bnb_multibackend_is_enabled = is_bitsandbytes_multi_backend_available()
gaudi_validate_bnb_backend_availability(raise_exception=True)
if kwargs.get("from_tf", False) or kwargs.get("from_flax", False):
raise ValueError(
"Converting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make"
" sure the weights are in PyTorch format."
)
device_map = kwargs.get("device_map", None)
if (
device_map is not None
and isinstance(device_map, dict)
and not self.quantization_config.llm_int8_enable_fp32_cpu_offload
):
device_map_without_lm_head = {
key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert
}
if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled:
pass
elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
raise ValueError(
"Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
"quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
"in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
"`from_pretrained`. Check "
"https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
"for more details. "
)