#  Copyright 2022 The HuggingFace Team. All rights reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
"""Defines the base classes that are used to perform inference with ONNX Runtime sessions."""

import os
import shutil
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union

import numpy as np
import torch

from onnxruntime import InferenceSession, IOBinding
from onnxruntime.transformers.io_binding_helper import TypeHelper

from ..onnx.utils import _get_model_external_data_paths
from ..utils.logging import get_logger
from .utils import (
    get_device_for_provider,
    get_dtype_from_session,
    get_provider_for_device,
    parse_device,
    validate_provider_availability,
)


logger = get_logger(__name__)

NON_EMPTY_TENSOR = torch.tensor(0)


class ORTSessionMixin:
    """
    Mixin class that provides common functionalities for an ONNX Runtime session.
    This class is used to manage the session, the execution provider, and the IO binding.
    It also provides methods to prepare the inputs and outputs for ONNX Runtime.
    """

    def initialize_ort_attributes(self, session: InferenceSession, use_io_binding: Optional[bool] = None):
        """
        Initializes the ORTSessionMixin class.
        Args:
            session (`onnxruntime.InferenceSession`):
                The ONNX Runtime session to use for inference.
            use_io_binding (`Optional[bool]`, defaults to `None`):
                Whether to use IO Binding or not. If `None`, it will be set to `True` for CUDAExecutionProvider and `False`
                for other providers.
        """

        self.session = session
        self.path = Path(session._model_path)

        if use_io_binding is None:
            if self.provider == "CUDAExecutionProvider":
                logger.info(
                    "`use_io_binding` was not set, but CUDAExecutionProvider supports IO Binding. "
                    "Setting `use_io_binding=True` to leverage IO Binding and improve performance. "
                    "You can disable it by setting `model.use_io_binding=False`."
                )
                use_io_binding = True
            else:
                use_io_binding = False

        self._use_io_binding = use_io_binding
        self._io_binding = IOBinding(session)
        self._dtype = get_dtype_from_session(session)
        self._device = get_device_for_provider(self.provider, self.provider_option)

        self.input_names = {input.name: idx for idx, input in enumerate(session.get_inputs())}
        self.output_names = {output.name: idx for idx, output in enumerate(session.get_outputs())}
        self.input_shapes = {input.name: input.shape for input in session.get_inputs()}
        self.output_shapes = {output.name: output.shape for output in session.get_outputs()}
        self.input_dtypes = {input.name: input.type for input in session.get_inputs()}
        self.output_dtypes = {output.name: output.type for output in session.get_outputs()}

    @property
    def model_path(self) -> str:
        """
        Returns the path of the onnx file from which the session was created.
        """
        logger.warning(
            "The `ORTSessionMixin.model_path` property is deprecated and will be removed in a future version. "
            "Please use `ORTSessionMixin.path` instead (`ORTSessionMixin.path` is a proper Path object)."
        )
        return self.path

    @property
    def model_name(self) -> str:
        """
        Returns the name of the onnx file from which the session was created.
        """
        logger.warning(
            "The `ORTSessionMixin.model_name` property is deprecated and will be removed in a future version. "
            "Please use `ORTSessionMixin.path.name` instead (`ORTSessionMixin.path` is a proper Path object)."
        )
        return self.path.name

    @property
    def providers(self) -> List[str]:
        """
        Returns a list of Execution Providers registered with the session.
        """
        return self.session.get_providers()

    @property
    def provider(self) -> str:
        """
        Returns the main Execution Provider registered with the session.
        """
        return self.providers[0]

    @property
    def provider_options(self) -> Dict[str, Any]:
        """
        Returns a dictionary of Execution Providers configurations/options.
        """
        return self.session.get_provider_options()

    @property
    def provider_option(self) -> Dict[str, Any]:
        """
        Returns the configuration/options of the main Execution Provider.
        """
        return self.provider_options[self.provider]

    @property
    def device(self) -> torch.device:
        """
        Returns the `torch.device` associated with the ONNX Runtime session.
        This device is inferred from the provider and provider options.
        """
        return self._device

    @device.setter
    def device(self, *args, **kwargs):
        raise AttributeError(
            "The device attribute is read-only, please use the `.to(device)` "
            "method to change both the device and the execution provider accordingly."
        )

    @property
    def dtype(self) -> torch.dtype:
        """
        Returns the `torch.dtype` associated with the ONNX Runtime session.
        This dtype is inferred from the input/output dtypes of the session.
        If no floating point type is found, it defaults to `torch.float32`.
        """
        return self._dtype

    @property
    def use_io_binding(self) -> Optional[bool]:
        """
        Returns whether IO Binding is used or not.
        """
        return self._use_io_binding

    @use_io_binding.setter
    def use_io_binding(self, value: bool):
        """
        Sets the IO Binding usage.
        """
        if not isinstance(value, bool):
            raise ValueError("`use_io_binding` should be a boolean value.")

        self._use_io_binding = value

    def to(self, *args, **kwargs):
        """
        Moves the session to the specified device by updating the execution provider and its options.
        Args:
            device (`str`, `int`, `torch.device`):
                The device to move the session to. It can be a string (e.g., "cuda", "cpu"), an integer (e.g., 0 for GPU 0),
                or a `torch.device` object.
        Returns:
            `ORTSessionMixin`: The updated session.
        Raises:
            ValueError: If the device is not supported or if the provider is not available.
        """

        dtype = None
        device = None

        for arg in args:
            if isinstance(arg, (str, torch.device)):
                device = arg
            elif isinstance(arg, int):
                device = torch.device(arg)
            elif isinstance(arg, torch.device):
                device = arg
            elif isinstance(arg, torch.dtype):
                dtype = arg

        for key, value in kwargs.items():
            if key == "device":
                device = value
            elif key == "dtype":
                dtype = value

        if dtype is not None:
            # we don't support changing the dtype of the model
            return self

        if device is None:
            # no device was provided, we don't change the device
            return self

        device, provider_option = parse_device(device)
        provider = get_provider_for_device(device)
        validate_provider_availability(provider)

        if device == self.device:
            return self

        self.session.set_providers([provider], provider_options=[provider_option])

        if self.use_io_binding is None:
            if self.provider == "CUDAExecutionProvider":
                logger.info(
                    "`use_io_binding` was set to `None` before the provider was changed to CUDAExecutionProvider. "
                    "Setting `use_io_binding=True` to leverage IO Binding and improve performance. "
                    "You can disable it by setting `model.use_io_binding=False`."
                )
                self.use_io_binding = True

        self._device = device

        return self

    def raise_on_numpy_input_io_binding(self, use_torch: bool):
        """
        Raises an error if IO Binding is requested although the tensor used are numpy arrays.

        Args:
            use_torch (`bool`):
                Whether the tensor used during inference are of type torch.Tensor or not.
        """
        if use_torch is False and self.use_io_binding is True:
            raise ValueError(
                "IO Binding can not be used when passing numpy inputs. Please disable IO Binding"
                " with `model.use_io_binding=False`, or pass `torch.Tensor` inputs instead."
            )

    def _prepare_onnx_inputs(
        self, use_torch: bool, model_inputs: Dict[str, Union[torch.Tensor, np.ndarray]]
    ) -> Dict[str, np.ndarray]:
        """
        Prepares the inputs for ONNX Runtime by converting them to numpy arrays with the expected dtype.

        Args:
            use_torch (`bool`):
                Whether the inputs are torch.Tensor or not.
            inputs (`Dict[str, Union[torch.Tensor, np.ndarray]]`):
                The inputs to prepare for ONNX Runtime.

        Returns:
            `Dict[str, np.ndarray]`: The inputs prepared for ONNX Runtime.
        """

        onnx_inputs = {}

        for input_name in self.input_names.keys():
            if model_inputs.get(input_name, None) is None:
                raise ValueError(f"Input {input_name} is required by model but not provided.")

            if use_torch:
                onnx_inputs[input_name] = model_inputs[input_name].numpy(force=True)
            else:
                onnx_inputs[input_name] = model_inputs[input_name]

            expected_dtype = TypeHelper.ort_type_to_numpy_type(self.input_dtypes[input_name])

            if onnx_inputs[input_name].dtype != expected_dtype:
                onnx_inputs[input_name] = onnx_inputs[input_name].astype(expected_dtype)

        return onnx_inputs

    def _prepare_onnx_outputs(
        self, use_torch: bool, onnx_outputs: List[np.ndarray]
    ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
        """
        Prepares the outputs from ONNX Runtime by converting them to torch.Tensor if requested.

        Args:
            use_torch (`bool`):
                Whether the outputs should be torch.Tensor or not.
            onnx_outputs (`List[np.ndarray]`):
                The outputs from ONNX Runtime.

        Returns:
            `Dict[str, Union[torch.Tensor, np.ndarray]]`: The outputs prepared for the user.
        """

        model_outputs = {}

        for output_name, idx in self.output_names.items():
            model_outputs[output_name] = onnx_outputs[idx]

            if use_torch:
                model_outputs[output_name] = torch.from_numpy(model_outputs[output_name]).to(self.device)

        return model_outputs

    def _prepare_output_buffer(self, output_name: str, output_shape: Tuple[int]) -> torch.Tensor:
        """
        Prepares an output buffer for ONNX Runtime IO Binding.

        Args:
            output_name (`str`):
                The name of the output for which to prepare the buffer.
            output_shape (`Tuple[int]`):
                The shape of the output buffer.

        Returns:
            `torch.Tensor`: The output buffer.

        """
        if len(output_shape) == 0:
            raise ValueError("`output_shape` should not be empty")
        elif not all(isinstance(dim, int) for dim in output_shape):
            raise ValueError(f"`output_shape` should only contain integers but got {output_shape}.")
        elif not all(dim > 0 for dim in output_shape):
            raise ValueError(f"`output_shape` should only contain positive integers but got {output_shape}.")

        output_dtype = TypeHelper.ort_type_to_torch_type(self.output_dtypes[output_name])

        if len(output_shape) > 0:
            output_buffer = torch.empty(np.prod(output_shape), dtype=output_dtype, device=self.device)
        else:
            output_buffer = torch.tensor(0, dtype=output_dtype, device=self.device)

        return output_buffer

    def _output_shape_inference(self, output_name: str, known_axes_values: Dict[str, int]) -> List[int]:
        """
        Infers the shape of a given output by using the `known_axes_values` mapping.

        Args:
            output_name (`str`):
                The name of the output for which to infer the shape.
            known_axes_values (`Dict[str, int]`):
                A mapping of the axis names to their values.

        Returns:
            `List[int]`: The inferred shape of the output.
        """

        output_shape = list(self.output_shapes[output_name])

        for idx, axis_name in enumerate(output_shape):
            if isinstance(axis_name, str):
                output_shape[idx] = self._dynamic_axis_inference(axis_name, known_axes_values)

        return output_shape

    def _dynamic_axis_inference(self, axis_name: Union[str], known_axes_values: Dict[str, int]) -> int:
        """
        Infers the value of a given dynamic axis by using the `known_axes_values` mapping.

        For instance, for the following inputs:
            axis_name = "sequence_length + past_sequence_length"
            known_axes_values = {"batch_size": 2, "sequence_length": 3, "past_sequence_length": 7}

        The inferred value will be:
            3 + 7 = 10
        """

        if axis_name in known_axes_values:
            # simple case, the axis value is known
            return known_axes_values[axis_name]

        tokens = axis_name.split(" ")
        for idx, token in enumerate(tokens):
            if token in known_axes_values:
                tokens[idx] = str(known_axes_values[token])

        return int(eval(" ".join(tokens)))

    def _prepare_io_binding(
        self,
        model_inputs: Dict[str, torch.Tensor],
        outputs_to_not_bind: Optional[Set[str]] = None,
        known_output_buffers: Optional[Dict[str, str]] = None,
        known_output_shapes: Optional[Dict[str, Tuple[int]]] = None,
    ) -> Tuple[Dict[str, Tuple[int]], Dict[str, torch.Tensor]]:
        """
        Prepares IO binding for ONNX Runtime.

        Args:
            model_inputs (`Dict[str, torch.Tensor]`):
                The inputs to bind to the model.
            outputs_to_not_bind (`Optional[Set[str]]`, defaults to `None`):
                The names of the outputs that should not be bound.
            known_output_buffers (`Optional[Dict[str, str]]`, defaults to `None`):
                Sometimes we can reuse the same input buffer for the output. This is the case for the output sample
                in a diffusion pipeline. It is possible to explicitely pass the buffer via this argument.
            known_output_shapes (`Optional[Dict[str, Tuple[int]]]`, defaults to `None`):
                It can be hard to infer all the output shapes from the inputs only. For instance for the past key /
                values. It is possible to explicitely pass the shape via this argument.

        Returns:
            `TupleDict[str, Tuple[int]], Dict[str, torch.Tensor]`: A dictionary of the output shapes and a dictionary of
            the output buffers.
        """

        known_axes_values = {}

        for input_name in self.input_names.keys():
            input_shape = model_inputs[input_name].shape

            if not model_inputs[input_name].is_contiguous():
                model_inputs[input_name] = model_inputs[input_name].contiguous()

            tensor_dtype = model_inputs[input_name].dtype
            expected_dtype = TypeHelper.ort_type_to_torch_type(self.input_dtypes[input_name])
            if tensor_dtype != expected_dtype:
                model_inputs[input_name] = model_inputs[input_name].to(expected_dtype)

            data_ptr = model_inputs[input_name].data_ptr()
            if data_ptr == 0:
                # During first generation, sequence_length can be 0 when use_cache=True, which results in data_ptr to also be 0.
                # To keep compatibility with IO binding, we pass the data pointer of a non-empty tensor.
                # No impact because past_key_values will not be used during the first generation.
                data_ptr = NON_EMPTY_TENSOR.data_ptr()

            self._io_binding.bind_input(
                input_name,
                self.device.type,
                self.device.index or 0,
                TypeHelper.ort_type_to_numpy_type(self.input_dtypes[input_name]),
                input_shape,
                data_ptr,
            )

            for idx, axis_name in enumerate(self.input_shapes[input_name]):
                if isinstance(axis_name, str):
                    known_axes_values[axis_name] = input_shape[idx]

        output_shapes = {}
        output_buffers = {}
        known_output_shapes = known_output_shapes or {}
        known_output_buffers = known_output_buffers or {}
        outputs_to_not_bind = outputs_to_not_bind or set()

        for output_name in self.output_names.keys():
            if output_name in outputs_to_not_bind:
                continue

            if output_name in known_output_shapes:
                output_shape = known_output_shapes[output_name]
            else:
                output_shape = self._output_shape_inference(output_name, known_axes_values)

            if output_name in known_output_buffers:
                output_buffer = known_output_buffers[output_name]
            else:
                output_buffer = self._prepare_output_buffer(output_name, output_shape)

            data_ptr = output_buffer.data_ptr()

            self._io_binding.bind_output(
                output_name,
                self.device.type,
                self.device.index or 0,
                TypeHelper.ort_type_to_numpy_type(self.output_dtypes[output_name]),
                output_shape,
                data_ptr,
            )

            output_buffers[output_name] = output_buffer
            output_shapes[output_name] = output_shape

        return output_shapes, output_buffers

    def forward(self, *args, **kwargs):
        raise NotImplementedError(
            "The `forward` method should be implemented in the derived class. "
            "Please refer to the documentation for more details."
        )

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

    def save_session(self, save_directory: Union[str, Path]):
        """
        Saves the ONNX Runtime session to the specified directory.

        Args:
            save_directory (`Union[str, Path]`):
                The directory where to save the ONNX Runtime session.
        """

        os.makedirs(save_directory, exist_ok=True)

        model_path = Path(self.session._model_path)
        model_save_path = Path(save_directory) / model_path.name
        external_data_paths = _get_model_external_data_paths(model_path)
        external_data_save_paths = [
            Path(save_directory) / external_data_path.name for external_data_path in external_data_paths
        ]

        shutil.copy(model_path, model_save_path)
        for src_path, dst_path in zip(external_data_paths, external_data_save_paths):
            shutil.copy(src_path, dst_path)


class ORTParentMixin:
    """
    Wrapper class for multiple ORTSessionMixin instances. This class allows to combine multiple parts into
    a single wrapper. It is useful for pipelines/models that require multiple parts to work together, such
    as diffusion pipelines or encoder-decoder models, as it provides a unified interface for inference.
    """

    def initialize_ort_attributes(self, parts: List[ORTSessionMixin]):
        """
        Initializes the ORTParentMixin class.
        Args:
            parts (`List[ORTSessionMixin]`):
                List of ORTSessionMixin instances to wrap.
        """

        if len(parts) < 1:
            raise ValueError("ORTParentMixin should be initialized with at least one part.")

        if any(not isinstance(model, ORTSessionMixin) for model in parts):
            raise ValueError("All parts passed to ORTParentMixin should be ORTSessionMixin instances.")

        self.parts = parts

    @property
    def providers(self):
        """
        Returns a list of Execution Providers registered with the session.
        """
        if not all(model.providers == self.parts[0].providers for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.providers` when the underlying parts have different values "
                "for `providers` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].providers

    @property
    def provider(self):
        """
        Returns the main Execution Provider registered with the session.
        """
        if not all(model.provider == self.parts[0].provider for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.provider` when the underlying parts have different values "
                "for `provider` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].provider

    @property
    def provider_options(self):
        """
        Returns a dictionary of Execution Providers configurations/options.
        """
        if not all(model.provider_options == self.parts[0].provider_options for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.provider_options` when the underlying parts have different values "
                "for `provider_options` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].provider_options

    @property
    def provider_option(self):
        """
        Returns the configuration/options of the main Execution Provider.
        """
        if not all(model.provider_option == self.parts[0].provider_option for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.provider_option` when the underlying parts have different values "
                "for `provider_option` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].provider_option

    @property
    def device(self):
        """
        Returns the `torch.device` associated with the ONNX Runtime session.
        This device is inferred from the provider and provider options.
        """
        if not all(model.device == self.parts[0].device for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.device` when the underlying parts have different values "
                "for `device` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].device

    @property
    def dtype(self):
        """
        Returns the `torch.dtype` associated with the ONNX Runtime session.
        This dtype is inferred from the input/output dtypes of the session.
        If no floating point type is found, it defaults to `torch.float32`.
        """
        if not all(model.dtype == self.parts[0].dtype for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.dtype` when the underlying parts have different values "
                "for `dtype` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].dtype

    @property
    def use_io_binding(self):
        """
        Returns whether IO Binding is used or not.
        """
        if not all(model.use_io_binding == self.parts[0].use_io_binding for model in self.parts):
            logger.warning(
                "Calling `ORTParentMixin.use_io_binding` when the underlying parts have different values "
                "for `use_io_binding` is not recommended. The value of the first session will be returned. "
            )
        return self.parts[0].use_io_binding

    @use_io_binding.setter
    def use_io_binding(self, value: bool):
        """
        Setter for the use_io_binding property.
        """
        for model in self.parts:
            model.use_io_binding = value

    def to(self, *args, **kwargs):
        """
        Moves all parts to the specified device by updating the execution provider and its options.
        Args:
            device (`str`, `int`, `torch.device`):
                The device to move the session to. It can be a string (e.g., "cuda", "cpu"), an integer (e.g., 0 for GPU 0),
                or a `torch.device` object.
        Returns:
            `ORTParentMixin`: The updated session.
        Raises:
            ValueError: If the device is not supported or if the provider is not available.
        """
        for model in self.parts:
            model.to(*args, **kwargs)

        return self