in storage/s3.py [0:0]
def get_by_lines(self, range_start: int) -> GetByLinesIterator:
original_range_start: int = range_start
s3_object_head = self._s3_client.head_object(Bucket=self._bucket_name, Key=self._object_key)
content_type: str = s3_object_head["ContentType"]
content_length: int = s3_object_head["ContentLength"]
shared_logger.debug(
"get_by_lines",
extra={
"content_type": content_type,
"range_start": range_start,
"bucket_name": self._bucket_name,
"object_key": self._object_key,
},
)
file_content: BytesIO = BytesIO(b"")
self._s3_client.download_fileobj(self._bucket_name, self._object_key, file_content)
file_content.flush()
file_content.seek(0, SEEK_SET)
is_gzipped: bool = False
if is_gzip_content(file_content.readline()):
is_gzipped = True
range_start = 0
if range_start < content_length:
file_content.seek(range_start, SEEK_SET)
for log_event, line_starting_offset, line_ending_offset, _, event_expanded_offset in self._generate(
original_range_start, file_content, is_gzipped
):
assert isinstance(log_event, bytes)
yield log_event, line_starting_offset, line_ending_offset, event_expanded_offset
else:
shared_logger.info(f"requested file content from {range_start}, file size {content_length}: skip it")