def s3_download_dir()

in app/source/dragen/src/scheduler/aws_utils.py [0:0]


def s3_download_dir(bucket, src_dir, tgt_dir, region='us-east-1', nosign=False):
    # Get the list of objects specified within the "dir"
    if nosign:
        client = boto3.client('s3', region, config=Config(signature_version=UNSIGNED))
    else:
        client = boto3.client('s3', region)
    response = client.list_objects(Bucket=bucket, Prefix=src_dir)

    if not response['Contents']:
        return 0

    # Filter out any results that are "dirs" by checking for ending '/'
    object_list = [x for x in response['Contents'] if not x['Key'].endswith('/')]

    # To avoid a race condition for parallel downloads, make sure each has a directory created
    # - Create the full dir path of each object and make sure the dir exists
    list([utils.check_create_dir(str(tgt_dir.rstrip('/') + '/' + x['Key']).rsplit('/', 1)[0]) for x in object_list])

    # Convert the list of objects to a dict we can pass to the download function
    download_dict_list = [{
            'bucket': bucket,
            'obj_key': x['Key'],
            'tgt_path': tgt_dir.rstrip('/') + '/' + x['Key'],
            'region': region
        } for x in object_list]

    # Create a thread pools to handle the downloads faster
    pool = Pool(DOWNLOAD_THREAD_COUNT)

    # Use the multiple thread pools to divvy up the downloads
    results = pool.map(s3_download_file, download_dict_list)

    # Close the pool and wait for the work to finish
    pool.close()
    pool.join()

    # return the total number of bytes downloaded
    return sum(results)