def find_bq_resources()

in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/Resources.py [0:0]


    def find_bq_resources(self, uris):
       
        # @input uris: comma-separated list of uri representing a BQ resource
        # BQ resources are specified as:  
        # bigquery/project/<project>/dataset/<dataset>/<table>
        # wildcards are allowed in the table and dataset components of the uri 
        resources = set()
        table_resources = set() 
        column_resources = set() 
        
        uri_list = uris.split(",")
        
        for uri in uri_list: 
            print("uri: " + uri)
            split_path = uri.strip().split("/")

            if split_path[1] != "project":
                print("Error: invalid URI " + path)
                return None
            
            project_id = split_path[2]
   
            path_length = len(split_path)
            #print("path_length: " + str(path_length))
            
            if path_length == 4:
                
                print('uri ' + uri + ' is at the project level')
                
                datasets = list(self.bq_client.list_datasets(project=project_id))
                
                for dataset in datasets:
                    tables = list(self.bq_client.list_tables(dataset.dataset_id))
        
                    for table in tables:
                        table_resources.add(table.full_table_id)
                
                tag_type = constants.BQ_TABLE_TAG
             
            if path_length > 4:
               
                dataset = split_path[4]
                dataset_list = self.get_datasets(dataset)                
                 
                for dataset_name in dataset_list:            
                    dataset_id = project_id + "." + dataset_name
            
                    print("path_length: ", path_length)
                    print("dataset_id: " + dataset_id)
                
                    if path_length == 5: 
                        tag_type = constants.BQ_DATASET_TAG
                        dataset_resource = self.format_dataset_resource(dataset_id)
                        resources.add(dataset_resource)
                        continue
                
                    table_expression = split_path[5]
                    print("table_expression: " + table_expression)

                    if path_length != 6:
                        print("Error. Invalid URI " + path)
                        return None
                    else:
                        tag_type = constants.BQ_TABLE_TAG

                    if table_expression == "*":
                        #print("list tables in dataset")
                        tables = list(self.bq_client.list_tables(self.bq_client.get_dataset(dataset_id)))
            
                        for table in tables:
                            #print("full_table_id: " + str(table.full_table_id))
                            table_resources.add(table.full_table_id)
                    
                    elif "*" in table_expression:
                        #print("table expression contains wildcard")
                        table_substrings = table_expression.split("*")
                        tables = list(self.bq_client.list_tables(self.bq_client.get_dataset(dataset_id)))
                    
                        for table in tables:
                            is_match = True
                            for substring in table_substrings:
                                if substring not in table.full_table_id:
                                    is_match = False
                                    break
                        
                            if is_match == True:
                                table_resources.add(table.full_table_id)
                
                    else:
                        table_id = dataset_id + "." + table_expression
                
                        try:
                            table = self.bq_client.get_table(table_id)
                            table_resources.add(table.full_table_id)
                    
                        except NotFound:
                            print("Error: " + table_id + " not found.")
            
                    
            if tag_type == constants.BQ_TABLE_TAG:
                for table in table_resources:
                    formatted_table = self.format_table_resource(table)
                    resources.add(formatted_table)
        
        return resources