def find_gcs_resources()

in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/Resources.py [0:0]


    def find_gcs_resources(self, uris):
    
        resources = set()
        
        uris_list = uris.split(',')
        
        for uri in uris_list:
            
            # remove the 'gs://' prefix from the uri
            short_uri = uri[5:].strip()
            #print('short_uri: ' + short_uri)
            
            split_uri = short_uri.split('/')
            bucket_name = split_uri[0]
            #print('bucket_name: ' + bucket_name)
            
            # uri contains a folder
            # examples: discovery-area/cities_311/* or discovery-area/cities_311/austin_311_service_requests.parquet
            if len(split_uri) > 2:
                folder_start_index = len(bucket_name) + 1
                #print('folder_start_index: ', folder_start_index)
                
                # uri points to a folder
                if short_uri.endswith('/*'):    
                    folder_end_index = short_uri.index('/*') 
                    folder = short_uri[folder_start_index:folder_end_index]
                    #print('folder: ' + folder)
                    
                    for blob in self.gcs_client.list_blobs(bucket_name, prefix=folder):
                        if blob.name == folder + '/' or blob.name.endswith('/'):
                            continue
                        resources.add((bucket_name, blob.name))
                        
                # uri points to a specific file
                # example: discovery-area/cities_311/austin_311_service_requests.parquet    
                else:
                    filename = short_uri[folder_start_index:]
                    #print('filename: ' + filename) 
                    bucket = self.gcs_client.get_bucket(bucket_name)
                    blob = bucket.blob(filename)
                    if blob.exists():
                        resources.add((bucket_name, blob.name))
            
            # uri does not contain a folder
            # examples: discovery-area/* or discovery-area/austin_311_service_requests.parquet  
            elif len(split_uri) == 2:    
                
                if short_uri.endswith('/*'):  
                    for blob in self.gcs_client.list_blobs(bucket_name):
                        if blob.name.endswith('/'):
                            continue
                        #print('blob: ' + str(blob.name))
                        resources.add((bucket_name, blob.name))
                else:
                    file_index_start = short_uri.index('/') + 1 
                    filename = short_uri[file_index_start:]
                    #print('filename: ' + filename)
                    bucket = self.gcs_client.get_bucket(bucket_name)
                    blob = bucket.blob(filename)
                    if blob.exists():
                        if blob.name.endswith('/') == False:
                            resources.add((bucket_name, blob.name))    
            else:
                print('Error: invalid uri provided: ' + uri)
                
        return resources