in bigquery_etl/copy_deduplicate.py [0:0]
def _list_live_tables(client, pool, project_id, only_tables, table_filter):
if only_tables and not _contains_glob(only_tables):
# skip list calls when only_tables exists and contains no globs
return [f"{project_id}.{t}" for t in only_tables if table_filter(t)]
if only_tables and not _contains_glob(_glob_dataset(t) for t in only_tables):
# skip list_datasets call when only_tables exists and datasets contain no globs
live_datasets = {f"{project_id}.{_glob_dataset(t)}" for t in only_tables}
else:
live_datasets = [
d.reference
for d in client.list_datasets(project_id)
if d.dataset_id.endswith("_live")
]
return [
sql_table_id(t)
for tables in pool.map(client.list_tables, live_datasets)
for t in tables
if table_filter(f"{t.dataset_id}.{t.table_id}")
and "beam_load_sink" not in t.table_id
]