def s3_select()

in api/streamlit_experiments/s3.py [0:0]


def s3_select():
    bucket = st.text_input('S3 bucket name', '')
    csv = st.text_input('CSV File path and name', '')
    st.write("Example: `SELECT * FROM s3object s LIMIT 5`")
    sql = st.text_area('SQL statement', '')
    if bucket and csv and sql:
        s3_select_results = s3_client.select_object_content(
            Bucket=bucket,
            Key=csv,
            Expression=sql,
            ExpressionType='SQL',
            InputSerialization={'CSV': {"FileHeaderInfo": "Use"}},
            OutputSerialization={'JSON': {}},
        )

        for event in s3_select_results['Payload']:
            if 'Records' in event:
                df = pd.read_json(io.StringIO(event['Records']['Payload'].decode('utf-8')), lines=True)
            elif 'Stats' in event:
                st.write(f"Scanned: {int(event['Stats']['Details']['BytesScanned'])/1024/1024:5.2f}MB")            
                st.write(f"Processed: {int(event['Stats']['Details']['BytesProcessed'])/1024/1024:5.2f}MB")
                st.write(f"Returned: {int(event['Stats']['Details']['BytesReturned'])/1024/1024:5.2f}MB")
        
        st.write(df)
    else:
        st.info('Provide S3 bucket, CSV file name, and SQL statement')