in utils/audio_upload.py [0:0]
def _GetGcsUris(bucket, project_id, impersonated_service_account, uri=True):
"""Returns a list of GCS uris for files in a bucket.
Args:
bucket: The GCS bucket.
project_id: The project ID (not number) to use.
impersonated_service_account: The service account to impersonate.
uri: Whether to return gcs uri or not
Returns:
The GCS uris or file name.
"""
uris = []
metadata = []
storage_client = storage.Client(
project=project_id,
credentials=_GetClientCredentials(impersonated_service_account),
)
blobs = storage_client.list_blobs(bucket)
for blob in blobs:
# Blobs ending in slashes are actually directory paths.
if not blob.name.endswith('/'):
# Redaction Error: >0.5MB transcript size
# if blob.size<=1e7:
if uri:
uris.append(_GetGcsUri(bucket, blob.name))
else:
uris.append(blob.name)
metadata.append(blob.metadata)
return uris, metadata