def generate_dataset_tables_columns_descriptions()

in src/package/dataplexutils/metadata/wizard.py [0:0]


    def generate_dataset_tables_columns_descriptions(self, dataset_fqn, strategy="NAIVE", documentation_csv_uri=None):
        """Generates metadata on the tables of a whole dataset.

        Args:
            dataset_fqn: The fully qualified name of the dataset
            (e.g., 'project.dataset')

        Returns:
          None.

        Raises:
            NotFound: If the specified table does not exist.
        """
        logger.info(f"Generating metadata for dataset {dataset_fqn}.")
        #for table in list:
       #     self.generate_table_description(f"{dataset_fqn}.{table}")
        try:
            logger.info(f"Strategy received: {strategy}")
            logger.info(f"Available strategies: {constants['GENERATION_STRATEGY']}")
            
            # Validate strategy exists
            if strategy not in constants["GENERATION_STRATEGY"]:
                raise ValueError(f"Invalid strategy: {strategy}. Valid strategies are: {list(constants['GENERATION_STRATEGY'].keys())}")
            
            int_strategy = constants["GENERATION_STRATEGY"][strategy]
            logger.info(f"Strategy value: {int_strategy}")
            
            bq_client = self._cloud_clients[constants["CLIENTS"]["BIGQUERY"]]
            bq_client = bigquery.Client()
                        

            if int_strategy not in constants["GENERATION_STRATEGY"].values():
                raise ValueError(f"Invalid strategy: {strategy}.")
            
            if int_strategy == constants["GENERATION_STRATEGY"]["DOCUMENTED"]:
                if documentation_csv_uri == None:
                    raise ValueError("A documentation URI is required for the DOCUMENTED strategy.")

            # If we are regenerating, we need to get the tables that need to be regenerated
            tables = self._list_tables_in_dataset(dataset_fqn)
            
            if int_strategy == constants["GENERATION_STRATEGY"]["DOCUMENTED"]:
                tables_from_uri = self._get_tables_from_uri(documentation_csv_uri)
                for table in tables_from_uri:
                    if table[0] not in tables:
                        raise ValueError(f"Table {table[0]} not found in dataset {dataset_fqn}.")
                    if self._client_options._regenerate and self._check_if_table_should_be_regenerated(table[0]):
                        self.generate_table_description(table[0], table[1])
                    
                    if not self._client_options._regenerate:
                        self.generate_table_description(table[0], table[1])

                    #call column generation because checking for column to-be-regenerated is done per column
                    self.generate_columns_descriptions(table[0],table[1])

            if int_strategy == constants["GENERATION_STRATEGY"]["DOCUMENTED_THEN_REST"]:
                tables_from_uri = self._get_tables_from_uri(documentation_csv_uri)
                for table in tables_from_uri:
                    if table not in tables:
                        raise ValueError(f"Table {table[0]} not found in dataset {dataset_fqn}.")
                    if self._client_options._regenerate and self._check_if_table_should_be_regenerated(table[0]):
                        self.generate_table_description(table[0], table[1])
                    
                    if not self._client_options._regenerate:
                        self.generate_table_description(table[0], table[1])

                tables_from_uri_first_elements = [table[0] for table in tables_from_uri]
                for table in tables:
                    if table not in tables_from_uri_first_elements:
                        if self._client_options._regenerate and self._check_if_table_should_be_regenerated(table[0]):
                            self.generate_table_description(table[0], table[1])
                
                        if not self._client_options._regenerate:
                            self.generate_table_description(table[0], table[1])
                        
                        self.generate_columns_descriptions(table[0],table[1])

            if int_strategy in [constants["GENERATION_STRATEGY"]["NAIVE"], constants["GENERATION_STRATEGY"]["RANDOM"], constants["GENERATION_STRATEGY"]["ALPHABETICAL"]]:
                tables_sorted = self._order_tables_to_strategy(tables, int_strategy)
                for table in tables_sorted:
                    self.generate_table_description(table)
                    self.generate_columns_descriptions(table)
               # self.generate_column_descriptions(table)

        except Exception as e:
            logger.error(f"Exception: {e}.")
            raise e