def run()

in optimum/onnxruntime/subpackage/commands/quantize.py [0:0]


    def run(self):
        from ...configuration import AutoQuantizationConfig, ORTConfig
        from ...quantization import ORTQuantizer

        if self.args.output == self.args.onnx_model:
            raise ValueError("The output directory must be different than the directory hosting the ONNX model.")

        save_dir = self.args.output
        quantizers = []
        use_external_data_format = False

        quantizers = [
            ORTQuantizer.from_pretrained(self.args.onnx_model, file_name=model.name)
            for model in self.args.onnx_model.glob("*.onnx")
        ]

        if self.args.arm64:
            qconfig = AutoQuantizationConfig.arm64(is_static=False, per_channel=self.args.per_channel)
        elif self.args.avx2:
            qconfig = AutoQuantizationConfig.avx2(is_static=False, per_channel=self.args.per_channel)
        elif self.args.avx512:
            qconfig = AutoQuantizationConfig.avx512(is_static=False, per_channel=self.args.per_channel)
        elif self.args.avx512_vnni:
            qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=self.args.per_channel)
        elif self.args.tensorrt:
            raise ValueError(
                "TensorRT quantization relies on static quantization that requires calibration, which is currently not supported through optimum-cli. Please adapt Optimum static quantization examples to run static quantization for TensorRT: https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/quantization"
            )
        else:
            config = ORTConfig.from_pretrained(self.args.config)
            qconfig = config.quantization
            use_external_data_format = config.use_external_data_format

        for q in quantizers:
            q.quantize(
                save_dir=save_dir, quantization_config=qconfig, use_external_data_format=use_external_data_format
            )