def _extract_config_data()

in jobs/kpi-forecasting/kpi_forecasting/results_processing.py [0:0]


    def _extract_config_data(self):
        """Extracts data from the dictionary created by _load_config_data and uses it to set
            the attributes below:
                input_table_full: id of the input data table extracted from the configs
                dimension_list: indicates which columns of the input table represent
                    dimensions, where different combinations of values specify separate forecasts
                    If a forecast has no such columns, set to an empty list

        Raises:
            Exception: Raised if list of config files have different values for the dimension list
            Exception: Raised if list of config files have different values for the input table
        """
        segment_data_list = []
        input_table_list = []
        config_file_list = list(self.config_data.keys())
        for config_data in self.config_data.values():
            # get segment data
            metric_hub_data = config_data["metric_hub"]
            if "segments" in metric_hub_data:
                segment_data = metric_hub_data["segments"]
                segment_data_list.append(segment_data)
            else:
                segment_data_list.append(None)

            # get input table info
            input_table_list.append(config_data["write_results"])

        input_table_data = input_table_list.pop(0)
        input_table_matches_first = [input_table_data == el for el in input_table_list]
        if not all(input_table_matches_first):
            config_file_list_string = " ".join(config_file_list)
            raise Exception(
                f"Input Table Data Does not all match for config list: {config_file_list_string}"
            )

        input_project = input_table_data["project"]
        input_dataset = input_table_data["dataset"]
        input_table = input_table_data["table"]

        input_table_full = f"{input_project}.{input_dataset}.{input_table}"

        segment_data = segment_data_list.pop(0)
        segment_data_matches_first = [segment_data == el for el in segment_data_list]
        if not all(segment_data_matches_first):
            config_file_list_string = " ".join(config_file_list)
            raise Exception(
                f"Dimension Data Does not all match for config list: {config_file_list_string}"
            )

        if segment_data:
            # this is the case where dimensions are present
            # we only need the column names for the query
            dimension_list = list(segment_data.keys())
        else:
            dimension_list = []

        self.input_table_full = input_table_full
        self.dimension_list = dimension_list

        if len(self.dimension_list) > 0:
            self.identifier_columns = (*self.identifier_columns, *self.dimension_list)

        # need identifier columns to be a list to make it easy to do pandas operations later
        self.identifier_columns = list(self.identifier_columns)