in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/Resources.py [0:0]
def find_bq_resources(self, uris):
# @input uris: comma-separated list of uri representing a BQ resource
# BQ resources are specified as:
# bigquery/project/<project>/dataset/<dataset>/<table>
# wildcards are allowed in the table and dataset components of the uri
resources = set()
table_resources = set()
column_resources = set()
uri_list = uris.split(",")
for uri in uri_list:
print("uri: " + uri)
split_path = uri.strip().split("/")
if split_path[1] != "project":
print("Error: invalid URI " + path)
return None
project_id = split_path[2]
path_length = len(split_path)
#print("path_length: " + str(path_length))
if path_length == 4:
print('uri ' + uri + ' is at the project level')
datasets = list(self.bq_client.list_datasets(project=project_id))
for dataset in datasets:
tables = list(self.bq_client.list_tables(dataset.dataset_id))
for table in tables:
table_resources.add(table.full_table_id)
tag_type = constants.BQ_TABLE_TAG
if path_length > 4:
dataset = split_path[4]
dataset_list = self.get_datasets(dataset)
for dataset_name in dataset_list:
dataset_id = project_id + "." + dataset_name
print("path_length: ", path_length)
print("dataset_id: " + dataset_id)
if path_length == 5:
tag_type = constants.BQ_DATASET_TAG
dataset_resource = self.format_dataset_resource(dataset_id)
resources.add(dataset_resource)
continue
table_expression = split_path[5]
print("table_expression: " + table_expression)
if path_length != 6:
print("Error. Invalid URI " + path)
return None
else:
tag_type = constants.BQ_TABLE_TAG
if table_expression == "*":
#print("list tables in dataset")
tables = list(self.bq_client.list_tables(self.bq_client.get_dataset(dataset_id)))
for table in tables:
#print("full_table_id: " + str(table.full_table_id))
table_resources.add(table.full_table_id)
elif "*" in table_expression:
#print("table expression contains wildcard")
table_substrings = table_expression.split("*")
tables = list(self.bq_client.list_tables(self.bq_client.get_dataset(dataset_id)))
for table in tables:
is_match = True
for substring in table_substrings:
if substring not in table.full_table_id:
is_match = False
break
if is_match == True:
table_resources.add(table.full_table_id)
else:
table_id = dataset_id + "." + table_expression
try:
table = self.bq_client.get_table(table_id)
table_resources.add(table.full_table_id)
except NotFound:
print("Error: " + table_id + " not found.")
if tag_type == constants.BQ_TABLE_TAG:
for table in table_resources:
formatted_table = self.format_table_resource(table)
resources.add(formatted_table)
return resources