in source/benchmark-sample/emr-benchmark.py [0:0]
def gen_benchmark_data(uri, scale, fs_proto, restore=False):
if not os.path.isfile(os.path.join(__dir__, 'target/tpcds-gen-1.0-SNAPSHOT.jar')):
printH('compile target/tpcds-gen-1.0-SNAPSHOT.jar from source')
sh(f'''
cd {__dir__}
sudo yum -y install java-1.8.0-openjdk-devel maven git gcc make flex bison byacc curl unzip patch
make -C tpcds-gen
cp -rvf tpcds-gen/target/ .
''')
printH(f'generating benchmark data {uri} {scale} {fs_proto}')
mode = 'RESTORE' if restore else 'LOAD'
t, _ = elapse(sh, f'{TPCDS_SETUP_BIN} {scale} {uri} {fs_proto} {mode}')
fwrite(f'{TPCDS_SETUP_BIN}.{fs_proto}.{scale}.res', str(t))