def walk()

in petastorm/gcsfs_helpers/gcsfs_wrapper.py [0:0]


    def walk(self, path):
        """
        Directory tree generator, like os.walk

        Generator version of what is in gcsfs, which yields a flattened list of
        files
        """
        from gcsfs.core import norm_path
        path = norm_path(_stringify_path(path))
        directories = set()
        files = set()

        for key in self.fs.ls(path, detail=True):
            # each info name must be at least [path]/part , but here
            # we check also for names like [path]/part/
            path = key['name']
            if key['storageClass'] == 'DIRECTORY':
                if path.endswith('/'):
                    directories.add(path[:-1])
                else:
                    directories.add(path)
            elif key['storageClass'] == 'BUCKET':
                pass
            else:
                files.add(path)

        files = sorted([posixpath.split(f)[1] for f in files
                        if f not in directories])
        directories = sorted([posixpath.split(x)[1]
                              for x in directories])

        yield path, directories, files

        for directory in directories:
            for tup in self.walk(directory):
                yield tup