in src/package/dataplexutils/metadata/wizard.py [0:0]
def _extract_column_info_from_table_profile(self,profile, column_name):
"""
Extract profile information for a specific column from the table profile JSON.
Args:
json_data (list): The JSON data containing table profile information
column_name (str): Name of the column to extract information for
Returns:
dict: Dictionary containing column profile information or None if column not found
"""
try:
# Get the fields from the first profile
if not profile or len(profile) == 0:
logger.info(f"No profile found for column {column_name}.")
return None
fields = profile[0]['profile']['fields']
# Find the matching column
for field in fields:
if field['name'] == column_name:
column_info = {
'name': field['name'],
'type': field['type'],
'mode': field['mode'],
'null_ratio': field['profile'].get('nullRatio', 0),
'distinct_ratio': field['profile'].get('distinctRatio', 0),
}
# Add type-specific profile information
if 'integerProfile' in field['profile']:
column_info.update({
'average': field['profile']['integerProfile'].get('average'),
'std_dev': field['profile']['integerProfile'].get('standardDeviation'),
'min': field['profile']['integerProfile'].get('min'),
'max': field['profile']['integerProfile'].get('max'),
'quartiles': field['profile']['integerProfile'].get('quartiles')
})
elif 'stringProfile' in field['profile']:
column_info.update({
'min_length': field['profile']['stringProfile'].get('minLength'),
'max_length': field['profile']['stringProfile'].get('maxLength'),
'avg_length': field['profile']['stringProfile'].get('averageLength')
})
elif 'doubleProfile' in field['profile']:
column_info.update({
'average': field['profile']['doubleProfile'].get('average'),
'std_dev': field['profile']['doubleProfile'].get('standardDeviation'),
'min': field['profile']['doubleProfile'].get('min'),
'max': field['profile']['doubleProfile'].get('max'),
'quartiles': field['profile']['doubleProfile'].get('quartiles')
})
# Add top N values if available
if 'topNValues' in field['profile']:
column_info['top_values'] = field['profile']['topNValues']
return column_info
return None
except Exception as e:
print(f"Error extracting column info: {str(e)}")
return None