in jobs/kpi-forecasting/kpi_forecasting/results_processing.py [0:0]
def _extract_config_data(self):
"""Extracts data from the dictionary created by _load_config_data and uses it to set
the attributes below:
input_table_full: id of the input data table extracted from the configs
dimension_list: indicates which columns of the input table represent
dimensions, where different combinations of values specify separate forecasts
If a forecast has no such columns, set to an empty list
Raises:
Exception: Raised if list of config files have different values for the dimension list
Exception: Raised if list of config files have different values for the input table
"""
segment_data_list = []
input_table_list = []
config_file_list = list(self.config_data.keys())
for config_data in self.config_data.values():
# get segment data
metric_hub_data = config_data["metric_hub"]
if "segments" in metric_hub_data:
segment_data = metric_hub_data["segments"]
segment_data_list.append(segment_data)
else:
segment_data_list.append(None)
# get input table info
input_table_list.append(config_data["write_results"])
input_table_data = input_table_list.pop(0)
input_table_matches_first = [input_table_data == el for el in input_table_list]
if not all(input_table_matches_first):
config_file_list_string = " ".join(config_file_list)
raise Exception(
f"Input Table Data Does not all match for config list: {config_file_list_string}"
)
input_project = input_table_data["project"]
input_dataset = input_table_data["dataset"]
input_table = input_table_data["table"]
input_table_full = f"{input_project}.{input_dataset}.{input_table}"
segment_data = segment_data_list.pop(0)
segment_data_matches_first = [segment_data == el for el in segment_data_list]
if not all(segment_data_matches_first):
config_file_list_string = " ".join(config_file_list)
raise Exception(
f"Dimension Data Does not all match for config list: {config_file_list_string}"
)
if segment_data:
# this is the case where dimensions are present
# we only need the column names for the query
dimension_list = list(segment_data.keys())
else:
dimension_list = []
self.input_table_full = input_table_full
self.dimension_list = dimension_list
if len(self.dimension_list) > 0:
self.identifier_columns = (*self.identifier_columns, *self.dimension_list)
# need identifier columns to be a list to make it easy to do pandas operations later
self.identifier_columns = list(self.identifier_columns)