in src/package/dataplexutils/metadata/wizard.py [0:0]
def generate_columns_descriptions(self, table_fqn,documentation_uri=None,human_comments=None):
"""Generates metadata on the columns.
Args:
table_fqn: The fully qualified name of the table
(e.g., 'project.dataset.table')
Returns:
None.
Raises:
NotFound: If the specified table does not exist.
"""
try:
logger.info(f"Generating metadata for columns in table {table_fqn}.")
self._table_exists(table_fqn)
table_schema_str, table_schema = self._get_table_schema(table_fqn)
table_sample = self._get_table_sample(
table_fqn, constants["DATA"]["NUM_ROWS_TO_SAMPLE"]
)
# Get additional information
table_quality = self._get_table_quality(
self._client_options._use_data_quality, table_fqn
)
table_profile = self._get_table_profile(
self._client_options._use_profile, table_fqn
)
try:
table_sources_info = self._get_table_sources_info(
self._client_options._use_lineage_tables, table_fqn
)
except Exception as e:
logger.error(f"Error getting table sources info for table {table_fqn}: {e}")
table_sources_info = None
try:
job_sources_info = self._get_job_sources(
self._client_options._use_lineage_processes, table_fqn
)
except Exception as e:
logger.error(f"Error getting job sources info for table {table_fqn}: {e}")
job_sources_info = None
if documentation_uri == "":
documentation_uri = None
prompt_manager = PromptManager(
PromtType.PROMPT_TYPE_COLUMN, self._client_options
)
# Get prompt
prompt_manager = PromptManager(
PromtType.PROMPT_TYPE_COLUMN, self._client_options
)
column_description_prompt = prompt_manager.get_promtp()
# We need to generate a new schema with the updated column
# descriptions and then swap it
updated_schema = []
updated_columns = []
for column in table_schema:
column_info = self._extract_column_info_from_table_profile(table_profile, column.name)
if self._client_options._use_human_comments:
human_comments = self._get_column_comment(table_fqn,column.name)
column_description_prompt_expanded = column_description_prompt.format(
column_name=column.name,
table_fqn=table_fqn,
table_schema_str=table_schema_str,
table_sample=table_sample,
table_profile=column_info,
table_quality=table_quality,
table_sources_info=table_sources_info,
job_sources_info=job_sources_info,
human_comments=human_comments
)
if self._client_options._regenerate == True and self._check_if_column_should_be_regenerated(table_fqn,column.name) or self._client_options._regenerate == False:
#logger.info(f"Prompt used is: {column_description_prompt_expanded}.")
column_description = self._llm_inference(
column_description_prompt_expanded,
documentation_uri=documentation_uri,
)
if self._client_options._add_ai_warning==True:
column_description = f"{constants['OUTPUT_CLAUSES']['AI_WARNING']}{column_description}"
updated_schema.append(
self._get_updated_column(column, column_description)
)
updated_columns.append(column)
logger.info(f"Generated column description: {column_description}.")
else:
updated_schema.append(column)
logger.info(f"Column {column.name} will not be updated.")
self._update_table_schema(table_fqn, updated_schema)
if self._client_options._regenerate:
for column in updated_columns:
logger.info(f"Updating table {table_fqn} column {column.name} as regenerated")
self._update_column_metadata_as_regenerated(table_fqn,column.name)
except Exception as e:
logger.error(f"Update of column description table {table_fqn} failed.")
raise e(
message=f"Generation of column description table {table_fqn} failed."
)