in source/benchmark-sample/emr-benchmark.py [0:0]
def run_query(uri, scale, fs_proto, cleanup=False, gendata=True, restore=False, engine='spark'):
if cleanup:
cleanup_benchmark_data(uri, scale, fs_proto)
if gendata:
if restore:
print('Trying to restore database')
gen_benchmark_data(uri, scale, fs_proto, restore)
if engine == 'spark':
printH('init thriftserver')
sh(INIT_THRIFT_SERVER_BIN)
else:
printH('stop thriftserver')
sh('sudo /usr/lib/spark/sbin/stop-thriftserver.sh')
printH(f'run {engine} queries {scale} {fs_proto}')
sh(f'{RUN_QUERY_BIN} -g {engine} -d tpcds_parquet_{scale}_{fs_proto},tpcds_orc_{scale}_{fs_proto} -s {uri}')