in 07-module-feature-monitoring/feature_monitoring_utils.py [0:0]
def create_crawler(database, table, verbose):
#Creation of the Crawler
glue_crawler_name = f'{table}-crawler'
glue_database_name = database
glue_table_name = table
glue_database_description = 'crawler to create new table with partitions'
#Instantiate AWS Glue client
glue = boto3.client(service_name='glue', region_name=region,
endpoint_url=f'https://glue.{region}.amazonaws.com')
client = boto3.client('glue')
response = client.list_crawlers()
available_crawlers = response["CrawlerNames"]
for crawler_name in available_crawlers:
if verbose:
print(crawler_name)
#response = client.get_crawler(Name=crawler_name)
if crawler_name == glue_crawler_name:
response = client.get_crawler(Name=crawler_name)
#pprint(response)
return response, glue_crawler_name
#Create AWS Glue Crawler (more properties can be externalised) - for Orders feature group only
response = glue.create_crawler(
Name=glue_crawler_name,
Role=role,
Description=glue_database_description,
Targets={
'CatalogTargets': [
{
'DatabaseName': glue_database_name,
'Tables': [
glue_table_name,
]
},
]
},
#Schedule='string', # ON DEMAND by default
SchemaChangePolicy={
'UpdateBehavior': 'UPDATE_IN_DATABASE',
'DeleteBehavior': 'LOG'
}
)
return response, glue_crawler_name