in awswrangler/s3/_fs.py [0:0]
def _fetch(self, start: int, end: int) -> None:
if end > self._size:
raise ValueError(f"Trying to fetch byte (at position {end - 1}) beyond file size ({self._size})")
if start < 0:
raise ValueError(f"Trying to fetch byte (at position {start}) beyond file range ({self._size})")
if start >= self._start and end <= self._end:
return None # Does not require download
if self._one_shot_download:
self._start = 0
self._end = self._size
self._cache = self._fetch_range_proxy(self._start, self._end)
return None
if end - start >= self._s3_block_size: # Fetching length greater than cache length
self._cache = self._fetch_range_proxy(start, end)
self._start = start
self._end = end
return None
# Calculating block START and END positions
_logger.debug("Downloading: %s (start) / %s (end)", start, end)
mid: int = int(math.ceil((start + (end - 1)) / 2))
new_block_start: int = mid - self._s3_half_block_size
new_block_start = new_block_start + 1 if self._s3_block_size % 2 == 0 else new_block_start
new_block_end: int = mid + self._s3_half_block_size + 1
_logger.debug("new_block_start: %s / new_block_end: %s / mid: %s", new_block_start, new_block_end, mid)
if new_block_start < 0 and new_block_end > self._size: # both ends overflowing
new_block_start = 0
new_block_end = self._size
elif new_block_end > self._size: # right overflow
new_block_start = new_block_start - (new_block_end - self._size)
new_block_start = 0 if new_block_start < 0 else new_block_start
new_block_end = self._size
elif new_block_start < 0: # left overflow
new_block_end = new_block_end - new_block_start
new_block_end = self._size if new_block_end > self._size else new_block_end
new_block_start = 0
_logger.debug(
"new_block_start: %s / new_block_end: %s/ self._start: %s / self._end: %s",
new_block_start,
new_block_end,
self._start,
self._end,
)
# Calculating missing bytes in cache
if ( # Full block download
(new_block_start < self._start and new_block_end > self._end)
or new_block_start > self._end
or new_block_end < self._start
):
self._cache = self._fetch_range_proxy(new_block_start, new_block_end)
elif new_block_end > self._end:
prune_diff: int = new_block_start - self._start
self._cache = self._cache[prune_diff:] + self._fetch_range_proxy(self._end, new_block_end)
elif new_block_start < self._start:
prune_diff = new_block_end - self._end
self._cache = self._fetch_range_proxy(new_block_start, self._start) + self._cache[:prune_diff]
else:
raise RuntimeError("AWSWrangler's cache calculation error.")
self._start = new_block_start
self._end = new_block_end
return None