in experiments/notebooks/cloudstory-api/cloudstory.py [0:0]
def s3_select(bucket, key, statement):
import io
s3_select_results = s3.select_object_content(
Bucket=bucket,
Key=key,
Expression=statement,
ExpressionType='SQL',
InputSerialization={'CSV': {"FileHeaderInfo": "Use"}},
OutputSerialization={'JSON': {}},
)
for event in s3_select_results['Payload']:
if 'Records' in event:
df = pd.read_json(io.StringIO(event['Records']['Payload'].decode('utf-8')), lines=True)
elif 'Stats' in event:
print(f"Scanned: {int(event['Stats']['Details']['BytesScanned'])/1024/1024:5.2f}MB")
print(f"Processed: {int(event['Stats']['Details']['BytesProcessed'])/1024/1024:5.2f}MB")
print(f"Returned: {int(event['Stats']['Details']['BytesReturned'])/1024/1024:5.2f}MB")
return (df)