in datasets/cloud_datasets/pipelines/_images/pdp_extract_tabular_metadata/script.py [0:0]
def parallel_read_tables(self, full_table_ids: List[str]):
"""Read tables metadata in parallel."""
num_tables = len(full_table_ids)
potential_interval_size = num_tables // NUM_THREADS
residual = num_tables % NUM_THREADS
index = 0
threads = []
while index < num_tables:
actual_interval_size = potential_interval_size
if residual > 0:
actual_interval_size += 1
residual -= 1
tables_ids = full_table_ids[index : index + actual_interval_size]
tr = threading.Thread(
target=self._read_tables_and_schema, args=(tables_ids,)
)
threads.append(tr)
index += actual_interval_size
for tr in threads:
tr.start()
for tr in threads:
tr.join()