in src/package/dataplexutils/metadata/wizard.py [0:0]
def _get_table_profile_quality(self, use_enabled, table_fqn):
"""Retrieves both profile and quality information for a BigQuery table.
Args:
use_enabled (bool): Whether profile/quality retrieval is enabled
table_fqn (str): The fully qualified name of the table
(e.g., 'project.dataset.table')
Returns:
dict: Dictionary containing:
- data_profile (list): Profile results
- data_quality (list): Quality results
Both will be empty lists if disabled/not available
Raises:
Exception: If there is an error retrieving the information
"""
try:
if use_enabled:
scan_client = self._cloud_clients[
constants["CLIENTS"]["DATAPLEX_DATA_SCAN"]
]
data_profile_results = []
data_quality_results = []
table_scan_references = self._get_table_scan_reference(table_fqn)
for table_scan_reference in table_scan_references:
if table_scan_reference:
for job in scan_client.list_data_scan_jobs(
ListDataScanJobsRequest(
parent=scan_client.get_data_scan(
GetDataScanRequest(name=table_scan_reference)
).name
)
):
job_result = scan_client.get_data_scan_job(
request=GetDataScanJobRequest(
name=job.name, view="FULL"
)
)
if job_result.state == DataScanJob.State.SUCCEEDED:
job_result_json = json.loads(
dataplex_v1.types.datascans.DataScanJob.to_json(
job_result
)
)
if "dataQualityResult" in job_result_json:
data_quality_results.append(
job_result_json["dataQualityResult"]
)
if "dataProfileResult" in job_result_json:
data_profile_results.append(
job_result_json["dataProfileResult"]
)
return {
"data_profile": data_profile_results,
"data_quality": data_quality_results,
}
else:
return {
"data_profile": [],
"data_quality": [],
}
except Exception as e:
logger.error(f"Exception: {e}.")
raise e