def gen_benchmark_data()

in source/benchmark-sample/emr-benchmark.py [0:0]


def gen_benchmark_data(uri, scale, fs_proto, restore=False):
    if not os.path.isfile(os.path.join(__dir__, 'target/tpcds-gen-1.0-SNAPSHOT.jar')):
        printH('compile target/tpcds-gen-1.0-SNAPSHOT.jar from source')
        sh(f'''
            cd {__dir__}
            sudo yum -y install java-1.8.0-openjdk-devel maven git gcc make flex bison byacc curl unzip patch
            make -C tpcds-gen
            cp -rvf tpcds-gen/target/ .
        ''')
    printH(f'generating benchmark data {uri} {scale} {fs_proto}')
    mode = 'RESTORE' if restore else 'LOAD'
    t, _ = elapse(sh, f'{TPCDS_SETUP_BIN} {scale} {uri} {fs_proto} {mode}')
    fwrite(f'{TPCDS_SETUP_BIN}.{fs_proto}.{scale}.res', str(t))