def maybe_offload_weights_to_cpu()

in src/optimum/nvidia/utils/offload.py [0:0]


def maybe_offload_weights_to_cpu(model: Module):
    if hasattr(model, "hf_device_map"):
        devices = set(model.hf_device_map.values())
        if "disk" in devices:
            raise ValueError("disk offload is not supported with quantization")
        if "cpu" in devices and len(model.hf_device_map) > 1:
            hook = None
            for name, device in model.hf_device_map.items():
                if device == "cpu":
                    LOGGER.debug(f"Offloading {name} to device {device}")
                    module = recurse_getattr(model, name)
                    remove_hook_from_module(module, recurse=True)
                    module, hook = cpu_offload_with_hook(module, prev_module_hook=hook)

    return model