in src/package/dataplexutils/metadata/wizard.py [0:0]
def generate_table_description(self, table_fqn, documentation_uri=None,human_comments=None):
"""Generates metadata on the tabes.
Args:
table_fqn: The fully qualified name of the table
(e.g., 'project.dataset.table')
Returns:
"Table description generated successfully"
Raises:
NotFound: If the specified table does not exist.
"""
logger.info(f"Generating metadata for table {table_fqn}.")
self._table_exists(table_fqn)
# Get base information
logger.info(f"Getting schema for table {table_fqn}.")
table_schema_str, _ = self._get_table_schema(table_fqn)
logger.info(f"Getting sample for table {table_fqn}.")
table_sample = self._get_table_sample(
table_fqn, constants["DATA"]["NUM_ROWS_TO_SAMPLE"]
)
# Get additional information
logger.info(f"Getting table quality for table {table_fqn}.")
table_quality = self._get_table_quality(
self._client_options._use_data_quality, table_fqn
)
logger.info(f"Getting table profile for table {table_fqn}.")
table_profile = self._get_table_profile(
self._client_options._use_profile, table_fqn
)
try:
logger.info(f"Getting source tables for table {table_fqn}.")
table_sources_info = self._get_table_sources_info(
self._client_options._use_lineage_tables, table_fqn
)
except Exception as e:
logger.error(f"Error getting table sources info for table {table_fqn}: {e}")
table_sources_info = None
try:
logger.info(f"Getting jobs calculating for table {table_fqn}.")
job_sources_info = self._get_job_sources(
self._client_options._use_lineage_processes, table_fqn
)
except Exception as e:
logger.error(f"Error getting job sources info for table {table_fqn}: {e}")
job_sources_info = None
prompt_manager = PromptManager(
PromtType.PROMPT_TYPE_TABLE, self._client_options
)
if documentation_uri == "":
documentation_uri = None
# Get prompt
table_description_prompt = prompt_manager.get_promtp()
# Format prompt
table_description_prompt_expanded = table_description_prompt.format(
table_fqn=table_fqn,
table_schema_str=table_schema_str,
table_sample=table_sample,
table_profile=table_profile,
table_quality=table_quality,
table_sources_info=table_sources_info,
job_sources_info=job_sources_info,
human_comments=human_comments
)
#logger.info(f"Prompt used is: {table_description_prompt_expanded}.")
table_description = self._llm_inference(table_description_prompt_expanded,documentation_uri)
if self._client_options._add_ai_warning==True:
table_description = f"{constants['OUTPUT_CLAUSES']['AI_WARNING']}{table_description}"
#logger.info(f"Generated description: {table_description}.")
# Update table
if not self._client_options._stage_for_review:
self._update_table_bq_description(table_fqn, table_description)
if self._client_options._persist_to_dataplex_catalog:
self._update_table_dataplex_description(table_fqn, table_description)
logger.info(f"Table description updated for table {table_fqn} in Dataplex catalog")
else:
if not self._check_if_exists_aspect_type(constants["ASPECT_TEMPLATE"]["name"]):
self._create_aspect_type(constants["ASPECT_TEMPLATE"]["name"])
self._update_table_draft_description(table_fqn, table_description,)
logger.info(f"Table {table_fqn} will not be updated in BigQuery.")
None
return "Table description generated successfully"