in utils.py [0:0]
def __getitem__(self, index):
# grab a uri and label
uri = self.uris[index]
label = self.labels[index]
# check if URI looks like an s3 key. if so, download key from s3.
if uri[:5] == "s3://":
# init s3 client if not yet done
if self.s3_client is None:
self.s3_client = boto3.Session().client('s3')
# split s3 uri into bucket and key
uri_parsed = urlparse(uri, allow_fragments=False)
bucket, key = uri_parsed.netloc, uri_parsed.path[1:]
# download sample from s3
sample_bytes = self.s3_client.get_object(Bucket=bucket, Key=key, RequestPayer='requester')["Body"].read()
# otherwise, open from filesystem
else:
sample_bytes = open(uri, "rb").read()
# if the sample is gzip'd, decompress it.
if sample_bytes[:2] == b"\x1f\x8b":
sample_bytes = gzip.decompress(sample_bytes)
return sample_bytes, uri, float(label)