in 07-module-feature-monitoring/feature_monitoring_utils.py [0:0]
def run_crawler(crawler: str, verbose: bool, *, timeout_minutes: int = 120, retry_seconds: int = 5) -> None:
"""Run the specified AWS Glue crawler, waiting until completion."""
# Ref: https://stackoverflow.com/a/66072347/
timeout_seconds = timeout_minutes * 60
client = boto3.client("glue")
start_time = timeit.default_timer()
abort_time = start_time + timeout_seconds
def wait_until_ready(verbose: bool) -> None:
state_previous = None
if verbose:
print(f'state_previous:= {state_previous}')
while True:
response_get = client.get_crawler(Name=crawler)
state = response_get["Crawler"]["State"]
if verbose:
print(f'current state= {state}')
else:
print(".", end = '')
if state != state_previous:
log.info(f"Crawler {crawler} is {state.lower()}.")
state_previous = state
if state == "READY": # Other known states: RUNNING, STOPPING
print("!\n")
return
if timeit.default_timer() > abort_time:
raise TimeoutError(f"Failed to crawl {crawler}. The allocated time of {timeout_minutes:,} minutes has elapsed.")
time.sleep(retry_seconds)
#wait_until_ready(verbose)
print(f"Start crawling {crawler}.")
response_start = client.start_crawler(Name=crawler)
assert response_start["ResponseMetadata"]["HTTPStatusCode"] == 200
if verbose:
log.info(f"Crawling {crawler}.")
print(f"Crawling {crawler}.")
wait_until_ready(verbose)
if verbose:
log.info(f"Crawled {crawler}.")
print(f"Crawled {crawler}.")