in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/TagEngineStoreHandler.py [0:0]
def generate_coverage_report(self, credentials):
summary_report = []
detailed_report = []
exists, settings = self.read_coverage_report_settings()
included_bigquery_projects = settings['included_bigquery_projects']
excluded_bigquery_datasets = settings['excluded_bigquery_datasets']
excluded_bigquery_tables = settings['excluded_bigquery_tables']
print('included_bigquery_projects: ' + included_bigquery_projects)
print('excluded_bigquery_datasets: ' + excluded_bigquery_datasets)
print('excluded_bigquery_tables: ' + excluded_bigquery_tables)
log_ref = self.db.collection('logs')
# list datasets and tables for chosen projects
for project in included_bigquery_projects.split(','):
project_id = project.strip()
bq_client = bigquery.Client(project=project_id)
datasets = list(bq_client.list_datasets())
total_tags = 0
for dataset in datasets:
dataset_id = dataset.dataset_id
if project_id + "." + dataset_id in excluded_bigquery_datasets:
#print('skipping ' + project_id + "." + dataset_id)
continue
print("dataset: " + dataset_id)
qualified_dataset = project_id + "." + dataset_id
overall_sum = 0
table_list = []
tables = list(bq_client.list_tables(dataset_id))
dcc = controller.DataCatalogController(credentials)
linked_resources = dcc.search_catalog(project_id, dataset_id)
print('linked_resources: ' + str(linked_resources))
for table in tables:
print("full_table_id: " + str(table.full_table_id))
table_path_full = table.full_table_id.replace(':', '/datasets/').replace('.', '/tables/')
table_path_short = table.full_table_id.replace(':', '.')
table_name = table_path_full.split('/')[4]
print('table_path_full: ' + table_path_full)
print('table_path_short: ' + table_path_short)
print('table_name: ' + table_name)
if table_path_short in project_id + '.' + excluded_bigquery_tables:
print('skipping ' + table_path_short)
continue
if table_name in linked_resources:
tag_count = linked_resources[table_name]
overall_sum = overall_sum + tag_count
print("tag_count = " + str(tag_count))
print("overall_sum = " + str(overall_sum))
# add the table name and tag count to a list
table_list.append((table_name, tag_count))
# add record to summary report
summary_record = (qualified_dataset, overall_sum)
summary_report.append(summary_record)
detailed_record = {qualified_dataset: table_list}
detailed_report.append(detailed_record)
return summary_report, detailed_report