def _summarize

def _summarize_legacy()

in jobs/kpi-forecasting/kpi_forecasting/models/prophet_forecast.py [0:0]
65 lines of code
7 McCabe index (conditional complexity)

    def _summarize_legacy(self) -> pd.DataFrame:
        """
        Converts a `self.summary_df` to the legacy format used in
        `moz-fx-data-shared-prod.telemetry_derived.kpi_automated_forecast_confidences_v1`
        """
        # TODO: This method should be removed once the forecasting data model is updated:
        # https://mozilla-hub.atlassian.net/browse/DS-2676

        df = self.summary_df.copy(deep=True)

        # rename columns to legacy values
        df.rename(
            columns={
                "forecast_end_date": "asofdate",
                "submission_date": "date",
                "metric_alias": "target",
                "aggregation_period": "unit",
            },
            inplace=True,
        )
        df["forecast_date"] = df["forecast_predicted_at"].dt.date
        df["type"] = df["source"].replace("historical", "actual")
        df = df.replace(
            {
                "measure": {
                    "observed": "value",
                    "p05": "yhat_p5",
                    "p10": "yhat_p10",
                    "p20": "yhat_p20",
                    "p30": "yhat_p30",
                    "p40": "yhat_p40",
                    "p50": "yhat_p50",
                    "p60": "yhat_p60",
                    "p70": "yhat_p70",
                    "p80": "yhat_p80",
                    "p90": "yhat_p90",
                    "p95": "yhat_p95",
                },
                "target": {
                    "desktop_dau": "desktop",
                    "mobile_dau": "mobile",
                },
            }
        )

        # pivot the df from "long" to "wide" format
        index_columns = [
            "asofdate",
            "date",
            "target",
            "unit",
            "forecast_parameters",
            "forecast_date",
        ]
        df = (
            df[index_columns + ["measure", "value"]]
            .pivot(
                index=index_columns,
                columns="measure",
                values="value",
            )
            .reset_index()
        )

        # pivot sets the "name" attribute of the columns for some reason. It's
        # None by default, so we just reset that here.
        df.columns.name = None

        # When there's an overlap in the observed and forecasted period -- for
        # example, when a monthly forecast is generated mid-month -- the legacy
        # format only records the forecasted value, not the observed value. To
        # account for this, we'll just find the max of the "mean" (forecasted) and
        # "value" (observed) data. In all non-overlapping observed periods, the
        # forecasted value will be NULL. In all non-overlapping forecasted periods,
        # the observed value will be NULL. In overlapping periods, the forecasted
        # value will always be larger because it is the sum of the observed and forecasted
        # values. Below is a query that demonstrates the legacy behavior:
        #
        # SELECT *
        #   FROM `moz-fx-data-shared-prod.telemetry_derived.kpi_automated_forecast_confidences_v1`
        #  WHERE asofdate = "2023-12-31"
        #    AND target = "mobile"
        #    AND unit = "month"
        #    AND forecast_date = "2022-06-04"
        #    AND date BETWEEN "2022-05-01" AND "2022-06-01"
        #  ORDER BY date
        df["value"] = df[["mean", "value"]].max(axis=1)
        df.drop(columns=["mean"], inplace=True)

        # non-numeric columns are represented in the legacy bq schema as strings
        string_cols = [
            "asofdate",
            "date",
            "target",
            "unit",
            "forecast_parameters",
            "forecast_date",
        ]
        df[string_cols] = df[string_cols].astype(str)

        return df