def resolved_paths()

in smallpond/logical/dataset.py [0:0]


    def resolved_paths(self) -> List[str]:
        """
        An ordered list of absolute paths of files.
        File patterns are expanded to absolute paths.

        Example::
        >>> DataSet(['data/100.parquet', '/datasetA/*.parquet']).resolved_paths
        ['/datasetA/1.parquet', '/datasetA/2.parquet', '/home/user/data/100.parquet']
        """
        if self._resolved_paths is None:
            resolved_paths = []
            wildcard_paths = []
            for path in self.absolute_paths:
                if has_magic(path):
                    wildcard_paths.append(path)
                else:
                    resolved_paths.append(path)
            if wildcard_paths:
                if len(wildcard_paths) == 1:
                    expanded_paths = glob.glob(wildcard_paths[0], recursive=self.recursive)
                else:
                    logger.debug(
                        "resolving {} paths with wildcards in {}",
                        len(wildcard_paths),
                        self,
                    )
                    with ThreadPoolExecutor(min(32, len(wildcard_paths))) as pool:
                        expanded_paths = [
                            p
                            for paths in pool.map(
                                lambda p: glob.glob(p, recursive=self.recursive),
                                wildcard_paths,
                            )
                            for p in paths
                        ]
                resolved_paths.extend(expanded_paths)
                logger.debug(
                    "resolved {} files from {} wildcard path(s) in {}",
                    len(expanded_paths),
                    len(wildcard_paths),
                    self,
                )
            self._resolved_paths = sorted(resolved_paths)
        return self._resolved_paths