in azure/datalake/store/multithread.py [0:0]
def __init__(self, adlfs, rpath, lpath, nthreads=None, chunksize=2**28,
buffersize=2**22, blocksize=2**22, client=None, run=True,
overwrite=False, verbose=False, progress_callback=None, timeout=0):
# validate that the src exists and the current user has access to it
# this only validates access to the top level folder. If there are files
# or folders underneath it that the user does not have access to the download
# will fail on those files. We clean the path in case there are wildcards.
# In this case, we will always invalidate the cache for this check to
# do our best to ensure that the path exists as close to run time of the transfer as possible.
# Due to the nature of a distributed filesystem, the path could be deleted later during execution,
# at which point the transfer's behavior may be non-deterministic, but it will indicate an error.
if not adlfs.exists(AzureDLPath(rpath).globless_prefix, invalidate_cache=True):
raise FileNotFoundError('Data Lake item at path: {} either does not exist or the current user does not have permission to access it.'.format(rpath))
if client:
self.client = client
else:
self.client = ADLTransferClient(
adlfs,
transfer=get_chunk,
nthreads=nthreads,
chunksize=chunksize,
buffersize=buffersize,
blocksize=blocksize,
chunked=False,
verbose=verbose,
parent=self,
progress_callback=progress_callback,
timeout=timeout)
self._name = tokenize(adlfs, rpath, lpath, chunksize, blocksize)
self.rpath = rpath
self.lpath = lpath
self._overwrite = overwrite
existing_files = self._setup()
if existing_files:
raise FileExistsError('Overwrite was not specified and the following files exist, blocking the transfer operation. Please specify overwrite to overwrite these files during transfer: {}'.format(','.join(existing_files)))
if run:
self.run()