def cleanup_benchmark_data()

in source/benchmark-sample/emr-benchmark.py [0:0]


def cleanup_benchmark_data(uri, scale, fs_proto):
    printH(f'cleaning benchmark data {uri} {scale} {fs_proto}')
    sh(f'hadoop fs -rm -r -f {uri}/tmp/tpcds-orc/{scale}')
    sh(f'hadoop fs -rm -r -f {uri}/tmp/tpcds-parquet/{scale}')
    sh(f'hadoop fs -rm -r -f {uri}/tmp/tpcds-generate/{scale}')
    sh(f'''\
        hive -e "
        drop database if exists tpcds_parquet_{scale}_{fs_proto} cascade;
        drop database if exists tpcds_orc_{scale}_{fs_proto} cascade;
        drop database if exists tpcds_text_{scale}_{fs_proto} cascade;
        " || true
    ''')