in petastorm/gcsfs_helpers/gcsfs_wrapper.py [0:0]
def walk(self, path):
"""
Directory tree generator, like os.walk
Generator version of what is in gcsfs, which yields a flattened list of
files
"""
from gcsfs.core import norm_path
path = norm_path(_stringify_path(path))
directories = set()
files = set()
for key in self.fs.ls(path, detail=True):
# each info name must be at least [path]/part , but here
# we check also for names like [path]/part/
path = key['name']
if key['storageClass'] == 'DIRECTORY':
if path.endswith('/'):
directories.add(path[:-1])
else:
directories.add(path)
elif key['storageClass'] == 'BUCKET':
pass
else:
files.add(path)
files = sorted([posixpath.split(f)[1] for f in files
if f not in directories])
directories = sorted([posixpath.split(x)[1]
for x in directories])
yield path, directories, files
for directory in directories:
for tup in self.walk(directory):
yield tup