in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/Resources.py [0:0]
def find_gcs_resources(self, uris):
resources = set()
uris_list = uris.split(',')
for uri in uris_list:
# remove the 'gs://' prefix from the uri
short_uri = uri[5:].strip()
#print('short_uri: ' + short_uri)
split_uri = short_uri.split('/')
bucket_name = split_uri[0]
#print('bucket_name: ' + bucket_name)
# uri contains a folder
# examples: discovery-area/cities_311/* or discovery-area/cities_311/austin_311_service_requests.parquet
if len(split_uri) > 2:
folder_start_index = len(bucket_name) + 1
#print('folder_start_index: ', folder_start_index)
# uri points to a folder
if short_uri.endswith('/*'):
folder_end_index = short_uri.index('/*')
folder = short_uri[folder_start_index:folder_end_index]
#print('folder: ' + folder)
for blob in self.gcs_client.list_blobs(bucket_name, prefix=folder):
if blob.name == folder + '/' or blob.name.endswith('/'):
continue
resources.add((bucket_name, blob.name))
# uri points to a specific file
# example: discovery-area/cities_311/austin_311_service_requests.parquet
else:
filename = short_uri[folder_start_index:]
#print('filename: ' + filename)
bucket = self.gcs_client.get_bucket(bucket_name)
blob = bucket.blob(filename)
if blob.exists():
resources.add((bucket_name, blob.name))
# uri does not contain a folder
# examples: discovery-area/* or discovery-area/austin_311_service_requests.parquet
elif len(split_uri) == 2:
if short_uri.endswith('/*'):
for blob in self.gcs_client.list_blobs(bucket_name):
if blob.name.endswith('/'):
continue
#print('blob: ' + str(blob.name))
resources.add((bucket_name, blob.name))
else:
file_index_start = short_uri.index('/') + 1
filename = short_uri[file_index_start:]
#print('filename: ' + filename)
bucket = self.gcs_client.get_bucket(bucket_name)
blob = bucket.blob(filename)
if blob.exists():
if blob.name.endswith('/') == False:
resources.add((bucket_name, blob.name))
else:
print('Error: invalid uri provided: ' + uri)
return resources