in src/package/dataplexutils/metadata/wizard.py [0:0]
def generate_dataset_tables_descriptions(self, dataset_fqn, strategy="NAIVE", documentation_csv_uri=None):
"""Generates metadata on the tables of a whole dataset.
Args:
dataset_fqn: The fully qualified name of the dataset
(e.g., 'project.dataset')
Returns:
None.
Raises:
NotFound: If the specified table does not exist.
"""
logger.info(f"Generating metadata for dataset {dataset_fqn}.")
#for table in list:
# self.generate_table_description(f"{dataset_fqn}.{table}")
try:
logger.info(f"Strategy received: {strategy}")
logger.info(f"Available strategies: {constants['GENERATION_STRATEGY']}")
# Validate strategy exists
if strategy not in constants["GENERATION_STRATEGY"]:
raise ValueError(f"Invalid strategy: {strategy}. Valid strategies are: {list(constants['GENERATION_STRATEGY'].keys())}")
int_strategy = constants["GENERATION_STRATEGY"][strategy]
logger.info(f"Strategy value: {int_strategy}")
bq_client = self._cloud_clients[constants["CLIENTS"]["BIGQUERY"]]
bq_client = bigquery.Client()
if int_strategy not in constants["GENERATION_STRATEGY"].values():
raise ValueError(f"Invalid strategy: {strategy}.")
if int_strategy == constants["GENERATION_STRATEGY"]["DOCUMENTED"]:
if documentation_csv_uri == None:
raise ValueError("A documentation URI is required for the DOCUMENTED strategy.")
if self._client_options._regenerate:
tables = self._list_tables_in_dataset_for_regeneration(dataset_fqn)
else:
tables = self._list_tables_in_dataset(dataset_fqn)
if int_strategy == constants["GENERATION_STRATEGY"]["DOCUMENTED"]:
tables_from_uri = self._get_tables_from_uri(documentation_csv_uri)
if not self._client_options._regenerate:
for table in tables_from_uri:
if table[0] not in tables:
raise ValueError(f"Table {table} not found in dataset {dataset_fqn}.")
self.generate_table_description(table[0], table[1])
if self._client_options._regenerate:
tables_from_uri_first_elements = [table[0] for table in tables_from_uri]
for table in tables:
if self._check_if_table_should_be_regenerated(table):
if table not in tables_from_uri_first_elements:
raise ValueError(f"Table {table} not found in documentation")
self.generate_table_description(table)
if int_strategy == constants["GENERATION_STRATEGY"]["DOCUMENTED_THEN_REST"]:
tables_from_uri = self._get_tables_from_uri(documentation_csv_uri)
if not self._client_options._regenerate:
for table in tables_from_uri:
if table not in tables:
raise ValueError(f"Table {table} not found in dataset {dataset_fqn}.")
self.generate_table_description(table[0], table[1])
tables_from_uri_first_elements = [table[0] for table in tables_from_uri]
if self._client_options._regenerate:
tables_from_uri_first_elements = [table[0] for table in tables_from_uri]
for table in tables:
if self._check_if_table_should_be_regenerated(table):
if table not in tables_from_uri_first_elements:
raise ValueError(f"Table {table} not found in documentation")
self.generate_table_description(table)
for table in tables:
if table not in tables_from_uri_first_elements:
self.generate_table_description(table)
if int_strategy in [constants["GENERATION_STRATEGY"]["NAIVE"], constants["GENERATION_STRATEGY"]["RANDOM"], constants["GENERATION_STRATEGY"]["ALPHABETICAL"]]:
tables_sorted = self._order_tables_to_strategy(tables, int_strategy)
for table in tables_sorted:
self.generate_table_description(table)
except Exception as e:
logger.error(f"Exception: {e}.")
raise e