in tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py [0:0]
def get_folders_in_gcs_path_prefix(gcs_client,
bucket,
prefix_path,
recursive=True):
"""
This function lists all folders in a given GCS path using a more
efficient prefix filtering method so it only lists objects in a bucket
with a given prefix instead of listing all the objects in a bucket.
Inspiration for this method came from:
https://github.com/googleapis/google-cloud-python/issues/920#issuecomment
-326125992 :param gcs_client: :param bucket: :param prefix_path: :param
recursive: Whether to recursively search for folders :return: list of GCS
URIs
"""
if (prefix_path is not None and not prefix_path.endswith('/') and
prefix_path != ''):
prefix_path = f"{prefix_path}/"
resp = gcs_client.list_blobs(bucket, prefix=prefix_path, delimiter='/')
# Iterate through response pages to retrieve only
# the gcs folder names (the file prefixes)
folders = set()
prefixes = set()
for page in resp.pages:
prefixes.update(page.prefixes)
# Check for folders within folders
for prefix in prefixes:
folders.add(prefix)
if recursive:
folders.update(
get_folders_in_gcs_path_prefix(gcs_client, bucket, prefix))
return folders