#  Licensed to the Apache Software Foundation (ASF) under one
#  or more contributor license agreements.  See the NOTICE file
#  distributed with this work for additional information
#  regarding copyright ownership.  The ASF licenses this file
#  to you under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance
#  with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
import typing
from pyflink.ml.wrapper import JavaWithParams
from pyflink.ml.param import StringParam, FloatParam
from pyflink.ml.common.param import HasFeaturesCol, HasLabelCol, HasOutputCol
from pyflink.ml.feature.common import JavaFeatureModel, JavaFeatureEstimator


class _UnivariateFeatureSelectorModelParams(
    JavaWithParams,
    HasFeaturesCol,
    HasOutputCol
):
    """
    Params for :class `UnivariateFeatureSelectorModel`.
    """
    def __init__(self, java_params):
        super(_UnivariateFeatureSelectorModelParams, self).__init__(java_params)


class _UnivariateFeatureSelectorParams(HasLabelCol, _UnivariateFeatureSelectorModelParams):
    """
    Params for :class `UnivariateFeatureSelector`.
    """

    """
    Supported options of the feature type.

    <ul>
        <li>categorical: the features are categorical data.
        <li>continuous: the features are continuous data.
    </ul>
    """
    FEATURE_TYPE: StringParam = StringParam(
        "feature_type",
        "The feature type.",
        None)

    """
    Supported options of the label type.

    <ul>
        <li>categorical: the label is categorical data.
        <li>continuous: the label is continuous data.
    </ul>
    """
    LABEL_TYPE: StringParam = StringParam(
        "label_type",
        "The label type.",
        None)

    """
    Supported options of the feature selection mode.

    <ul>
        <li>numTopFeatures: chooses a fixed number of top features according to a hypothesis.
        <li>percentile: similar to numTopFeatures but chooses a fraction of all features
            instead of a fixed number.
        <li>fpr: chooses all features whose p-value are below a threshold, thus controlling the
            false positive rate of selection.
        <li>fdr: uses the <ahref="https://en.wikipedia.org/wiki/False_discovery_rate#
            Benjamini.E2.80.93Hochberg_procedure">Benjamini-Hochberg procedure</a> to choose
            all features whose false discovery rate is below a threshold.
        <li>fwe: chooses all features whose p-values are below a threshold. The threshold is
            scaled by 1/numFeatures, thus controlling the family-wise error rate of selection.
    </ul>
    """
    SELECTION_MODE: StringParam = StringParam(
        "selection_mode",
        "The feature selection mode.",
        "numTopFeatures")

    SELECTION_THRESHOLD: FloatParam = FloatParam(
        "selection_threshold",
        "The upper bound of the features that selector will select. If not set, it will be "
        "replaced with a meaningful value according to different selection modes at runtime. "
        "When the mode is numTopFeatures, it will be replaced with 50; when the mode is "
        "percentile, it will be replaced with 0.1; otherwise, it will be replaced with 0.05.",
        None)

    def __init__(self, java_params):
        super(_UnivariateFeatureSelectorParams, self).__init__(java_params)

    def set_feature_type(self, value: str):
        return typing.cast(_UnivariateFeatureSelectorParams, self.set(self.FEATURE_TYPE, value))

    def get_feature_type(self) -> str:
        return self.get(self.FEATURE_TYPE)

    def set_label_type(self, value: str):
        return typing.cast(_UnivariateFeatureSelectorParams, self.set(self.LABEL_TYPE, value))

    def get_label_type(self) -> str:
        return self.get(self.LABEL_TYPE)

    def set_selection_mode(self, value: str):
        return typing.cast(_UnivariateFeatureSelectorParams, self.set(self.SELECTION_MODE, value))

    def get_selection_mode(self) -> str:
        return self.get(self.SELECTION_MODE)

    def set_selection_threshold(self, value: float):
        return typing.cast(_UnivariateFeatureSelectorParams,
                           self.set(self.SELECTION_THRESHOLD, float(value)))

    def get_selection_threshold(self) -> float:
        return self.get(self.SELECTION_THRESHOLD)

    @property
    def feature_type(self):
        return self.get_feature_type()

    @property
    def label_type(self):
        return self.get_label_type()

    @property
    def selection_mode(self):
        return self.get_selection_mode()

    @property
    def selection_threshold(self):
        return self.get_selection_threshold()


class UnivariateFeatureSelectorModel(JavaFeatureModel, _UnivariateFeatureSelectorModelParams):
    """
    A Model which transforms data using the model data computed
    by :class:`UnivariateFeatureSelector`.
    """

    def __init__(self, java_model=None):
        super(UnivariateFeatureSelectorModel, self).__init__(java_model)

    @classmethod
    def _java_model_package_name(cls) -> str:
        return "univariatefeatureselector"

    @classmethod
    def _java_model_class_name(cls) -> str:
        return "UnivariateFeatureSelectorModel"


class UnivariateFeatureSelector(JavaFeatureEstimator, _UnivariateFeatureSelectorParams):
    """
    An Estimator which selects features based on univariate statistical tests against labels.

    Currently, Flink supports three Univariate Feature Selectors: chi-squared, ANOVA F-test and
    F-value. User can choose Univariate Feature Selector by setting `featureType` and `labelType`,
    and Flink will pick the score function based on the specified `featureType` and `labelType`.

    The following combination of `featureType` and `labelType` are supported:

    <ul>
        <li>`featureType` `categorical` and `labelType` `categorical`: Flink uses chi-squared,
            i.e. chi2 in sklearn.
        <li>`featureType` `continuous` and `labelType` `categorical`: Flink uses ANOVA F-test,
            i.e. f_classif in sklearn.
        <li>`featureType` `continuous` and `labelType` `continuous`: Flink uses F-value,
            i.e. f_regression in sklearn.
    </ul>

    The `UnivariateFeatureSelector` supports different selection modes:

    <ul>
        <li>numTopFeatures: chooses a fixed number of top features according to a hypothesis.
        <li>percentile: similar to numTopFeatures but chooses a fraction of all features
            instead of a fixed number.
        <li>fpr: chooses all features whose p-value are below a threshold, thus controlling
            the false positive rate of selection.
        <li>fdr: uses the <ahref="https://en.wikipedia.org/wiki/False_discovery_rate#
            Benjamini.E2.80.93Hochberg_procedure">Benjamini-Hochberg procedure</a> to choose
            all features whose false discovery rate is below a threshold.
        <li>fwe: chooses all features whose p-values are below a threshold. The threshold is
            scaled by 1/numFeatures, thus controlling the family-wise error rate of selection.
    </ul>

    By default, the selection mode is `numTopFeatures`.
    """

    def __init__(self):
        super(UnivariateFeatureSelector, self).__init__()

    @classmethod
    def _create_model(cls, java_model) -> UnivariateFeatureSelectorModel:
        return UnivariateFeatureSelectorModel(java_model)

    @classmethod
    def _java_estimator_package_name(cls) -> str:
        return "univariatefeatureselector"

    @classmethod
    def _java_estimator_class_name(cls) -> str:
        return "UnivariateFeatureSelector"