def run_crawler()

in 07-module-feature-monitoring/feature_monitoring_utils.py [0:0]


def run_crawler(crawler: str, verbose: bool, *, timeout_minutes: int = 120, retry_seconds: int = 5) -> None:
    """Run the specified AWS Glue crawler, waiting until completion."""
    # Ref: https://stackoverflow.com/a/66072347/
    timeout_seconds = timeout_minutes * 60
    client = boto3.client("glue")
    start_time = timeit.default_timer()
    abort_time = start_time + timeout_seconds

    def wait_until_ready(verbose: bool) -> None:
        state_previous = None        
        if verbose:
            print(f'state_previous:= {state_previous}')
        while True:
            response_get = client.get_crawler(Name=crawler)
            state = response_get["Crawler"]["State"]
            if verbose:
                print(f'current state= {state}')
            else:
                print(".", end = '')
            if state != state_previous:
                log.info(f"Crawler {crawler} is {state.lower()}.")
                state_previous = state
            if state == "READY":  # Other known states: RUNNING, STOPPING
                print("!\n")
                return
            if timeit.default_timer() > abort_time:
                raise TimeoutError(f"Failed to crawl {crawler}. The allocated time of {timeout_minutes:,} minutes has elapsed.")
            time.sleep(retry_seconds)

    #wait_until_ready(verbose)
    print(f"Start crawling {crawler}.")
    response_start = client.start_crawler(Name=crawler)
    assert response_start["ResponseMetadata"]["HTTPStatusCode"] == 200
    if verbose:
        log.info(f"Crawling {crawler}.")
        print(f"Crawling {crawler}.")
    wait_until_ready(verbose)
    if verbose:
        log.info(f"Crawled {crawler}.")
        print(f"Crawled {crawler}.")