bigquery_etl/util/bigquery_tables.py (43 lines of code) (raw):

"""Helper methods to fetch BigQuery tables.""" from fnmatch import fnmatchcase from typing import List from google.cloud import bigquery def _uses_wildcards(pattern: str) -> bool: return bool(set("*?[]") & set(pattern)) def get_tables_matching_patterns( client: bigquery.Client, patterns: List[str] ) -> List[str]: """Get BigQuery tables matching the provided patterns.""" all_projects = None all_datasets = {} all_tables = {} matching_tables = [] for pattern in patterns: project, _, dataset_table = pattern.partition(":") dataset, _, table = dataset_table.partition(".") projects = [project or client.project] dataset = dataset or "*" table = table or "*" if _uses_wildcards(project): if all_projects is None: all_projects = [p.project_id for p in client.list_projects()] projects = [p for p in all_projects if fnmatchcase(project, p)] for project in projects: datasets = [dataset] if _uses_wildcards(dataset): if project not in all_datasets: all_datasets[project] = [ d.dataset_id for d in client.list_datasets(project) ] datasets = [d for d in all_datasets[project] if fnmatchcase(d, dataset)] for dataset in datasets: dataset = f"{project}.{dataset}" tables = [f"{dataset}.{table}"] if _uses_wildcards(table): if dataset not in all_tables: all_tables[dataset] = list(client.list_tables(dataset)) tables = [ f"{dataset}.{t.table_id}" for t in all_tables[dataset] if fnmatchcase(t.table_id, table) ] matching_tables += tables return matching_tables