5-app-infra/3-artifact-publish/docker/cdmc/report_engine/BigQueryAPI.py (44 lines of code) (raw):

# Copyright 2023 Google, LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from google.cloud import bigquery from google.cloud import datacatalog_v1 def getTableLocation(asset): table_id = extractTableId(asset) client = bigquery.Client() table = client.get_table(table_id) return table.location def getPolicyTagInfo(tag_name): client = datacatalog_v1.PolicyTagManagerClient() request = datacatalog_v1.GetPolicyTagRequest( name=tag_name, ) policy_tag = client.get_policy_tag(request=request) return policy_tag.display_name def getTablePolicyTagsDict(asset): table_dict = {} table_id = extractTableId(asset) client = bigquery.Client() table = client.get_table(table_id) for field in table.schema: if(field.policy_tags != None): table_dict[field.name.upper()]=getPolicyTagInfo(field.policy_tags.names[0]).upper() return table_dict def extractTableId(asset): projectId = asset[(asset.find( "/projects/" ) + len( "/projects/")):asset.find( "/datasets/" )] dataset = asset[(asset.find( "/datasets/" ) + len( "/projects/")):asset.find( "/tables/" )] table = asset[(asset.find( "/tables/" ) + len( "/tables/")):len(asset)+1] return (projectId + "." + dataset + "." + table) def queryTable(project_id, datasets, query_file): client = bigquery.Client() query = "" with open(query_file) as f: query = f.read() print(f"query=",query.replace("$project_id",project_id).format(*datasets.split(","))) return client.query(query.replace("$project_id",project_id).format(*datasets.split(","))) def getBQAssets(source_project_id,source_region): # Define the source and destination datasets client = bigquery.Client() # Define the query query_string = """ SELECT CONCAT(table_catalog,".",table_schema,".",table_name) as asset_name FROM `$project_id.region-$source_region.INFORMATION_SCHEMA.TABLES` """.replace("$project_id",source_project_id).replace("$source_region",source_region) # Execute the query resultset = client.query(query_string) result_list = [] for row in resultset: result_list.append(row["asset_name"]) return result_list