def _get_local

def _get_local_path()

in iopath/common/s3.py [0:0]
40 lines of code
10 McCabe index (conditional complexity)

    def _get_local_path(self, path: str, **kwargs: Any) -> str:
        """
        Get a filepath which is compatible with native Python I/O such as `open`
        and `os.path`.
        If URI points to a remote resource, this function may download and cache
        the resource to local disk. In this case, the cache stays on filesystem
        (under `file_io.get_cache_dir()`) and will be used by a different run.
        Therefore this function is meant to be used with read-only resources.
        Args:
            path (str): A URI supported by this PathHandler
        Returns:
            local_path (str): a file path which exists on the local file system
        """
        logger = logging.getLogger(__name__)
        self._check_kwargs(kwargs)

        # Cheap check first.
        if path.endswith("/"):
            raise NotImplementedError(
                "S3PathHandler does not currently support downloading directories"
            )
        assert self._isfile(path)

        local_path = self._local_cache_path(path)
        with file_lock(local_path):
            if os.path.exists(local_path):
                # If local object's last modified time is *after* remote object's last modified
                # time, do not use the cache.  Instead, redownload.
                response = self._head_object(path)
                if response is not None:
                    remote_dt = response["LastModified"]
                    local_dt = dt.datetime.fromtimestamp(
                        os.path.getmtime(local_path)
                    ).astimezone()
                    # NOTE: may consider still avoid cache if times are close, to avoid a race condition.
                    # Currently, a lengthy download of a very recent but stale file would have a late
                    # local last modified timestamp, and would be improperly used.
                    # Better fix: set last modified time via the remote object's last modified time,
                    # in download_file().
                    if (local_dt - remote_dt) > dt.timedelta(minutes=0):
                        logger.info(
                            "URL {} was already cached in {}".format(path, local_path)
                        )
                        return local_path

            logger.info("Caching {} ...".format(path))
            tmp = local_path + ".tmp"
            # clean-up tmp if found, because if tmp exists, it must be a dirty
            # result of a previously process that didn't cleanup itself.
            if os.path.isfile(tmp):
                os.unlink(tmp)

            bucket, s3_path = self._parse_uri(path)
            client = self._get_client(bucket)
            try:
                response = client.download_file(
                    bucket, s3_path, tmp, Config=self.transfer_config
                )

                # First download to tmp, then move it, because move is
                # (almost?) atomic when src and dst are in the same file
                # system. This will avoid partial cache state if the
                # process is killed.
                shutil.move(tmp, local_path)
            finally:
                try:
                    os.unlink(tmp)
                except Exception:
                    pass

            logger.info("URL {} cached in {}".format(path, local_path))
            return local_path