def refute_estimate()

in dowhy/causal_refuters/placebo_treatment_refuter.py [0:0]
89 lines of code
11 McCabe index (conditional complexity)

    def refute_estimate(self):
        # only permute is supported for iv methods
        if self._target_estimand.identifier_method.startswith("iv"):
            if self._placebo_type != "permute":
                self.logger.error("Only placebo_type=''permute'' is supported for creating placebo for instrumental variable estimation methods")
                raise ValueError("Only placebo_type=''permute'' is supported for creating placebo for instrumental variable estimation methods.")

        # We need to change the identified estimand
        # We make a copy as a safety measure, we don't want to change the
        # original DataFrame
        identified_estimand = copy.deepcopy(self._target_estimand)
        identified_estimand.treatment_variable = ["placebo"]
        if self._target_estimand.identifier_method.startswith("iv"):
            identified_estimand.instrumental_variables = ["placebo_" + s for s in identified_estimand.instrumental_variables]
            # For IV methods, the estimating_instrument_names should also be
            # changed. So we change it inside the estimate and then restore it
            # back at the end of this method.
            if self._estimate.params["method_params"] is not None and "iv_instrument_name" in self._estimate.params["method_params"]:
                self._estimate.params["method_params"]["iv_instrument_name"] = \
                ["placebo_" + s for s in parse_state(self._estimate.params["method_params"]["iv_instrument_name"])]

        sample_estimates = np.zeros(self._num_simulations)
        self.logger.info("Refutation over {} simulated datasets of {} treatment"
                        .format(self._num_simulations
                        ,self._placebo_type)
                        )

        num_rows = self._data.shape[0]
        treatment_name = self._treatment_name[0] # Extract the name of the treatment variable
        type_dict = dict( self._data.dtypes )

        for index in range(self._num_simulations):

            if self._placebo_type == "permute":
                permuted_idx = None
                if self._random_state is None:
                    permuted_idx = np.random.choice(self._data.shape[0],
                            size=self._data.shape[0], replace=False)

                else:
                    permuted_idx = self._random_state.choice(self._data.shape[0],
                            size=self._data.shape[0], replace=False)
                new_treatment = self._data[self._treatment_name].iloc[permuted_idx].values
                if self._target_estimand.identifier_method.startswith("iv"):
                    new_instruments_values = self._data[self._estimate.estimator.estimating_instrument_names].iloc[permuted_idx].values
                    new_instruments_df = pd.DataFrame(new_instruments_values,
                            columns=["placebo_" + s for s in self._data[self._estimate.estimator.estimating_instrument_names].columns])
            else:
                if 'float' in type_dict[treatment_name].name :
                    self.logger.info("Using a Normal Distribution with Mean:{} and Variance:{}"
                                     .format(PlaceboTreatmentRefuter.DEFAULT_MEAN_OF_NORMAL
                                     ,PlaceboTreatmentRefuter.DEFAULT_STD_DEV_OF_NORMAL)
                                     )
                    new_treatment = np.random.randn(num_rows)*PlaceboTreatmentRefuter.DEFAULT_STD_DEV_OF_NORMAL + \
                                    PlaceboTreatmentRefuter.DEFAULT_MEAN_OF_NORMAL

                elif 'bool' in type_dict[treatment_name].name :
                    self.logger.info("Using a Binomial Distribution with {} trials and {} probability of success"
                                    .format(PlaceboTreatmentRefuter.DEFAULT_NUMBER_OF_TRIALS
                                    ,PlaceboTreatmentRefuter.DEFAULT_PROBABILITY_OF_BINOMIAL)
                                    )
                    new_treatment = np.random.binomial(PlaceboTreatmentRefuter.DEFAULT_NUMBER_OF_TRIALS,
                                                       PlaceboTreatmentRefuter.DEFAULT_PROBABILITY_OF_BINOMIAL,
                                                       num_rows).astype(bool)

                elif 'int' in type_dict[treatment_name].name :
                    self.logger.info("Using a Discrete Uniform Distribution lying between {} and {}"
                    .format(self._data[treatment_name].min()
                    ,self._data[treatment_name].max())
                    )
                    new_treatment = np.random.randint(low=self._data[treatment_name].min(),
                                                      high=self._data[treatment_name].max(),
                                                      size=num_rows)

                elif 'category' in type_dict[treatment_name].name :
                    categories = self._data[treatment_name].unique()
                    self.logger.info("Using a Discrete Uniform Distribution with the following categories:{}"
                    .format(categories))
                    sample = np.random.choice(categories, size=num_rows)
                    new_treatment = pd.Series(sample).astype('category')

            # Create a new column in the data by the name of placebo
            new_data = self._data.assign(placebo=new_treatment)
            if self._target_estimand.identifier_method.startswith("iv"):
                new_data = pd.concat((new_data, new_instruments_df), axis=1)
            # Sanity check the data
            self.logger.debug(new_data[0:10])
            new_estimator = CausalEstimator.get_estimator_object(new_data, identified_estimand, self._estimate)
            new_effect = new_estimator.estimate_effect()
            sample_estimates[index] = new_effect.value

        # Restoring the value of iv_instrument_name
        if self._target_estimand.identifier_method.startswith("iv"):
            if self._estimate.params["method_params"] is not None and "iv_instrument_name" in self._estimate.params["method_params"]:
                self._estimate.params["method_params"]["iv_instrument_name"] = \
                [s.replace("placebo_","",1) for s in parse_state(self._estimate.params["method_params"]["iv_instrument_name"])]
        refute = CausalRefutation(self._estimate.value,
                                  np.mean(sample_estimates),
                                  refutation_type="Refute: Use a Placebo Treatment")

        # Note: We hardcode the estimate value to ZERO as we want to check if it falls in the distribution of the refuter
        # Ideally we should expect that ZERO should fall in the distribution of the effect estimates as we have severed any causal
        # relationship between the treatment and the outcome.
        dummy_estimator = CausalEstimate(
                estimate=0,
                control_value=self._estimate.control_value,
                treatment_value=self._estimate.treatment_value,
                target_estimand=self._estimate.target_estimand,
                realized_estimand_expr=self._estimate.realized_estimand_expr)

        refute.add_significance_test_results(
            self.test_significance(dummy_estimator, sample_estimates)
        )
        refute.add_refuter(self)
        return refute