def s3_select()

in experiments/notebooks/cloudstory-api/cloudstory.py [0:0]


def s3_select(bucket, key, statement):
    import io

    s3_select_results = s3.select_object_content(
        Bucket=bucket,
        Key=key,
        Expression=statement,
        ExpressionType='SQL',
        InputSerialization={'CSV': {"FileHeaderInfo": "Use"}},
        OutputSerialization={'JSON': {}},
    )

    for event in s3_select_results['Payload']:
        if 'Records' in event:
            df = pd.read_json(io.StringIO(event['Records']['Payload'].decode('utf-8')), lines=True)
        elif 'Stats' in event:
            print(f"Scanned: {int(event['Stats']['Details']['BytesScanned'])/1024/1024:5.2f}MB")            
            print(f"Processed: {int(event['Stats']['Details']['BytesProcessed'])/1024/1024:5.2f}MB")
            print(f"Returned: {int(event['Stats']['Details']['BytesReturned'])/1024/1024:5.2f}MB")
    return (df)