def get_folders_in_gcs_path_prefix()

in tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py [0:0]


def get_folders_in_gcs_path_prefix(gcs_client,
                                   bucket,
                                   prefix_path,
                                   recursive=True):
    """
    This function lists all folders in a given GCS path using a more
    efficient prefix filtering method so it only lists objects in a bucket
    with a given prefix instead of listing all the objects in a bucket.
    Inspiration for this method came from:
    https://github.com/googleapis/google-cloud-python/issues/920#issuecomment
    -326125992 :param gcs_client: :param bucket: :param prefix_path: :param
    recursive: Whether to recursively search for folders :return: list of GCS
    URIs
    """

    if (prefix_path is not None and not prefix_path.endswith('/') and
            prefix_path != ''):
        prefix_path = f"{prefix_path}/"
    resp = gcs_client.list_blobs(bucket, prefix=prefix_path, delimiter='/')
    # Iterate through response pages to retrieve only
    # the gcs folder names (the file prefixes)
    folders = set()
    prefixes = set()
    for page in resp.pages:
        prefixes.update(page.prefixes)
    # Check for folders within folders
    for prefix in prefixes:
        folders.add(prefix)
        if recursive:
            folders.update(
                get_folders_in_gcs_path_prefix(gcs_client, bucket, prefix))
    return folders