def _set_prediction

def _set_prediction_meta()

in orbit/forecaster/forecaster.py [0:0]
42 lines of code
12 McCabe index (conditional complexity)

    def _set_prediction_meta(self, df: pd.DataFrame) -> pd.DataFrame:
        """A default pre-processing and information gathering from prediction input dataframe"""
        # remove reference from original input
        df = df.copy()

        # get prediction df meta
        prediction_meta = {
            PredictionMetaKeys.DATE_ARRAY.value: pd.to_datetime(
                df[self.date_col]
            ).reset_index(drop=True),
            PredictionMetaKeys.PREDICTION_DF_LEN.value: df.shape[0],
            PredictionMetaKeys.START.value: df[self.date_col].iloc[0],
            PredictionMetaKeys.END.value: df[self.date_col].iloc[-1],
        }

        if not is_ordered_datetime(prediction_meta[TrainingMetaKeys.DATE_ARRAY.value]):
            raise ForecasterException("Datetime index must be ordered and not repeat")

        if not is_even_gap_datetime(prediction_meta[TrainingMetaKeys.DATE_ARRAY.value]):
            warnings.warn("Datetime index is not evenly distributed")

        # TODO: validate that all regressor columns are present, if any

        if (
            prediction_meta[PredictionMetaKeys.START.value]
            < self._training_meta[TrainingMetaKeys.START.value]
        ):
            raise ForecasterException("Prediction start must be after training start.")

        trained_len = self._training_meta[TrainingMetaKeys.NUM_OF_OBS.value]

        # If we cannot find a match of prediction range, assume prediction starts right after train
        # end
        if (
            prediction_meta[PredictionMetaKeys.START.value]
            > self._training_meta[TrainingMetaKeys.END.value]
        ):
            # time index for prediction start and end
            start_idx = trained_len
            end_idx = start_idx + df.shape[0]
            forecast_dates = set(prediction_meta[TrainingMetaKeys.DATE_ARRAY.value])
            n_forecast_steps = len(forecast_dates)
        else:
            # # compute how many steps to forecast
            # forecast_dates = set(
            #     prediction_meta[TrainingMetaKeys.DATE_ARRAY.value]
            # ) - set(self._training_meta[TrainingMetaKeys.DATE_ARRAY.value])
            # # check if prediction df is a subset of training df
            # # e.g. "negative" forecast steps
            # n_forecast_steps = len(forecast_dates) or -(
            #     len(
            #         set(self._training_meta[TrainingMetaKeys.DATE_ARRAY.value])
            #         - set(prediction_meta[TrainingMetaKeys.DATE_ARRAY.value])
            #     )
            # )

            # time index for prediction start and end
            start_idx = pd.Index(
                self._training_meta[TrainingMetaKeys.DATE_ARRAY.value]
            ).get_loc(prediction_meta[PredictionMetaKeys.START.value])
            end_idx = start_idx + df.shape[0]
            n_forecast_steps = end_idx - trained_len

        prediction_meta.update(
            {
                PredictionMetaKeys.START_INDEX.value: start_idx,
                PredictionMetaKeys.END_INDEX.value: end_idx,
                PredictionMetaKeys.FUTURE_STEPS.value: n_forecast_steps,
            }
        )

        self._prediction_meta = prediction_meta