optimum/commands/neural_compressor/quantize.py (67 lines of code) (raw):

# Copyright 2023 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys from pathlib import Path from typing import TYPE_CHECKING, Optional from ...exporters import TasksManager from ..base import BaseOptimumCLICommand, CommandInfo if TYPE_CHECKING: from argparse import ArgumentParser, Namespace, _SubParsersAction def parse_args_inc_quantize(parser: "ArgumentParser"): required_group = parser.add_argument_group("Required arguments") required_group.add_argument( "--model", type=str, required=True, help="Path to the repository where the model to quantize is located.", ) required_group.add_argument( "-o", "--output", type=Path, required=True, help="Path to the directory where to store generated quantized model.", ) optional_group = parser.add_argument_group("Optional arguments") optional_group.add_argument( "--task", default="auto", help=( "The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:" f" {str(TasksManager.get_all_tasks())}." ), ) class INCQuantizeCommand(BaseOptimumCLICommand): def __init__( self, subparsers: "_SubParsersAction", args: Optional["Namespace"] = None, command: Optional["CommandInfo"] = None, from_defaults_factory: bool = False, parser: Optional["ArgumentParser"] = None, ): super().__init__( subparsers, args=args, command=command, from_defaults_factory=from_defaults_factory, parser=parser ) self.args_string = " ".join(sys.argv[3:]) @staticmethod def parse_args(parser: "ArgumentParser"): return parse_args_inc_quantize(parser) def run(self): from neural_compressor.config import PostTrainingQuantConfig from ...intel.neural_compressor import INCQuantizer save_dir = self.args.output model_id = self.args.model task = self.args.task if save_dir == model_id: raise ValueError("The output directory must be different than the directory hosting the model.") if task == "auto": try: task = TasksManager.infer_task_from_model(model_id) except Exception as e: return ( f"### Error: {e}. Please pass explicitely the task as it could not be infered.", None, ) model = TasksManager.get_model_from_task(task, model_id) quantization_config = PostTrainingQuantConfig(approach="dynamic") quantizer = INCQuantizer.from_pretrained(model) quantizer.quantize(quantization_config=quantization_config, save_directory=save_dir)