in api/streamlit_experiments/s3.py [0:0]
def s3_select():
bucket = st.text_input('S3 bucket name', '')
csv = st.text_input('CSV File path and name', '')
st.write("Example: `SELECT * FROM s3object s LIMIT 5`")
sql = st.text_area('SQL statement', '')
if bucket and csv and sql:
s3_select_results = s3_client.select_object_content(
Bucket=bucket,
Key=csv,
Expression=sql,
ExpressionType='SQL',
InputSerialization={'CSV': {"FileHeaderInfo": "Use"}},
OutputSerialization={'JSON': {}},
)
for event in s3_select_results['Payload']:
if 'Records' in event:
df = pd.read_json(io.StringIO(event['Records']['Payload'].decode('utf-8')), lines=True)
elif 'Stats' in event:
st.write(f"Scanned: {int(event['Stats']['Details']['BytesScanned'])/1024/1024:5.2f}MB")
st.write(f"Processed: {int(event['Stats']['Details']['BytesProcessed'])/1024/1024:5.2f}MB")
st.write(f"Returned: {int(event['Stats']['Details']['BytesReturned'])/1024/1024:5.2f}MB")
st.write(df)
else:
st.info('Provide S3 bucket, CSV file name, and SQL statement')