in 07-module-feature-monitoring/feature_monitoring_utils.py [0:0]
def feature_monitoring_run(fg_name, verbose=True):
# Instantiate an AWS Glue DataBrew Object
databrew = boto3.client(service_name='databrew', region_name=region)
fg = FeatureGroup(name=fg_name, sagemaker_session=feature_store_session)
table_name = fg.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName']
table_name_ctas = f'{table_name}{ctas_table_suffix}'
# Variables configuration
databrew_dataset_name = f'{fg_name}{databrew_dataset_suffix}'
databrew_profilejob_name = f'{fg_name}{databrew_profilejob_suffix}'
# Check if the CTAS table exists, then delete it
if checkTableExists(table_name_ctas):
delete_fg_snapshot_ctas(fg_name, verbose)
# Create the CTAS table
snapshot_table = create_fg_snapshot_ctas(fg_name, verbose)
print("Running DataBrew Profiling Job")
#
# Start first execution of the profile job
response_job_start = databrew.start_job_run(
Name=databrew_profilejob_name)
# Wait until job completes
wait_until_job_ready(databrew_profilejob_name, response_job_start["RunId"], verbose)
assert response_job_start["ResponseMetadata"]["HTTPStatusCode"] == 200
if verbose:
print(f"Profiling {databrew_profilejob_name}.")
# Preparing results
response_get = databrew.describe_job_run(
Name=databrew_profilejob_name,
RunId=response_job_start["RunId"])
brew_results_bucket = response_get["Outputs"][0]['Location']['Bucket']
brew_results_key = response_get["Outputs"][0]['Location']['Key']
output_s3_file_url = f's3://{brew_results_bucket}/{brew_results_key}'
if verbose:
print(output_s3_file_url)
databrew_profile_console_url = f'https://{region}.console.aws.amazon.com/databrew/home?region={region}#dataset-details?dataset={databrew_dataset_name}&tab=profile-overview'
if verbose:
print(databrew_profile_console_url)
# Load Report JSON file into a Pyton Dict object
s3 = boto3.resource('s3')
obj = s3.Object(brew_results_bucket, brew_results_key)
report_data = json.load(obj.get()['Body'])
# Add tags to the FG
feature_add_tag(fg_name, fg_profileurl_tag, Utils._escape_tag_chars(databrew_profile_console_url))
feature_add_tag(fg_name, fg_profiles3link_tag, output_s3_file_url)
return brew_results_bucket, brew_results_key, databrew_profile_console_url, report_data, output_s3_file_url