optimum/intel/neural_compressor/utils.py (52 lines of code) (raw):

# Copyright 2022 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import logging import os import warnings from collections import UserDict import torch from neural_compressor.utils.pytorch import load from torch.utils.data import DataLoader from ..utils.constant import WEIGHTS_NAME logger = logging.getLogger(__name__) CONFIG_NAME = "best_configure.yaml" QUANTIZATION_CONFIG_NAME = "quantize_config.json" IPEX_MINIMUM_VERSION = "2.4.0" NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0" NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0" _HEAD_TO_AUTOMODELS = { "fill-mask": "INCModelForMaskedLM", "text-generation": "INCModelForCausalLM", "text2text-generation": "INCModelForSeq2SeqLM", "text-classification": "INCModelForSequenceClassification", "token-classification": "INCModelForTokenClassification", "question-answering": "INCModelForQuestionAnswering", "multiple-choice": "INCModelForMultipleChoice", "stable-diffusion": "INCStableDiffusionPipeline", "feature-extraction": "INCModel", } class INCDataLoader(DataLoader): use_label = True @classmethod def from_pytorch_dataloader(cls, dataloader: DataLoader, use_label: bool = True): if not isinstance(dataloader, DataLoader): raise TypeError(f"Expected a PyTorch DataLoader, got: {type(dataloader)}.") inc_dataloader = cls(dataloader.dataset) cls.use_label = use_label for key, value in dataloader.__dict__.items(): inc_dataloader.__dict__[key] = value return inc_dataloader def __iter__(self): for input in super().__iter__(): if not isinstance(input, (dict, tuple, list, UserDict)): raise TypeError(f"Model calibration cannot use input of type {type(input)}.") label = input.get("labels") if isinstance(input, dict) else None if self.use_label: yield input, label else: yield input def load_quantized_model(checkpoint_dir_or_file: str, model: torch.nn.Module, **kwargs) -> torch.nn.Module: """ Returns the quantized model, which was quantized through neural_compressor. Arguments: checkpoint_dir_or_file (`str`): The path to the model checkpoint containing the quantization information. model (`torch.nn.Module`): The original FP32 model. """ warnings.warn("This function has been depreciated and will be removed in optimum-intel v1.9.") if os.path.isdir(checkpoint_dir_or_file): checkpoint_dir_or_file = os.path.join( os.path.abspath(os.path.expanduser(checkpoint_dir_or_file)), WEIGHTS_NAME ) return load(checkpoint_dir_or_file, model, **kwargs)