in datasets/cloud_datasets/pipelines/_images/pdp_extract_tabular_metadata/script.py [0:0]
def read_datasets(self):
"""Read the datasets and tables metadata."""
datasets_list = list(self.client.list_datasets())
full_table_ids = []
logging.info("Enlisted Datasets: %s", len(datasets_list))
for dataset_item in datasets_list:
if dataset_item.dataset_id.startswith("_"):
continue # Not a public dataset
dataset_reference = self.client.get_dataset(dataset_item.reference)
dataset = DatasetInfo(dataset_item, dataset_reference)
table_ids = list(self.client.list_tables(dataset_reference))
dataset.num_tables = len(table_ids)
full_table_ids.extend(
[t.full_table_id.replace(":", ".") for t in table_ids]
)
self.datasets.append(dataset)
self.parallel_read_tables(full_table_ids)
logging.info("Extracted Datasets: %s", len(self.datasets))
logging.info("Extracted Tables: %s", len(self.tables))
logging.info("Extracted Fields: %s", len(self.tables_fields))