def parallel_read_tables()

in datasets/cloud_datasets/pipelines/_images/pdp_extract_tabular_metadata/script.py [0:0]


    def parallel_read_tables(self, full_table_ids: List[str]):
        """Read tables metadata in parallel."""
        num_tables = len(full_table_ids)
        potential_interval_size = num_tables // NUM_THREADS
        residual = num_tables % NUM_THREADS
        index = 0
        threads = []
        while index < num_tables:
            actual_interval_size = potential_interval_size
            if residual > 0:
                actual_interval_size += 1
                residual -= 1
            tables_ids = full_table_ids[index : index + actual_interval_size]
            tr = threading.Thread(
                target=self._read_tables_and_schema, args=(tables_ids,)
            )
            threads.append(tr)
            index += actual_interval_size
        for tr in threads:
            tr.start()
        for tr in threads:
            tr.join()