in azure/datalake/store/multithread.py [0:0]
def _setup(self):
""" Create set of parameters to loop over
"""
is_path_walk_empty = False
if "*" not in self.lpath:
lfiles = []
for directory, subdir, fnames in os.walk(self.lpath):
lfiles.extend([os.path.join(directory, f) for f in fnames])
if not subdir and not fnames: # Empty Directory
self.client._adlfs._emptyDirs.append(directory)
if (not lfiles and os.path.exists(self.lpath) and
not os.path.isdir(self.lpath)):
lfiles = [self.lpath]
is_path_walk_empty = True
else:
lfiles = glob.glob(self.lpath)
if len(lfiles) > 0 and not is_path_walk_empty:
local_rel_lpath = str(AzureDLPath(self.lpath).globless_prefix)
file_pairs = [(f, self.rpath / AzureDLPath(f).relative_to(local_rel_lpath)) for f in lfiles]
elif lfiles:
if self.client._adlfs.exists(self.rpath, invalidate_cache=True) and \
self.client._adlfs.info(self.rpath, invalidate_cache=False)['type'] == "DIRECTORY":
file_pairs = [(lfiles[0], self.rpath / AzureDLPath(lfiles[0]).name)]
else:
file_pairs = [(lfiles[0], self.rpath)]
else:
raise ValueError('No files to upload')
# this property is used for internal validation
# and should not be referenced directly by public callers
self._file_pairs = file_pairs
existing_files = []
for lfile, rfile in file_pairs:
if not self._overwrite and self.client._adlfs.exists(rfile, invalidate_cache=False):
existing_files.append(rfile.as_posix())
else:
fsize = os.stat(lfile).st_size
self.client.submit(lfile, rfile, fsize)
return existing_files