in parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java [63:137]
public void generateAll() {
try {
generateData(
file_1M,
configuration,
PARQUET_2_0,
BLOCK_SIZE_DEFAULT,
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
ONE_MILLION);
// generate data for different block and page sizes
generateData(
file_1M_BS256M_PS4M,
configuration,
PARQUET_2_0,
BLOCK_SIZE_256M,
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
ONE_MILLION);
generateData(
file_1M_BS256M_PS8M,
configuration,
PARQUET_2_0,
BLOCK_SIZE_256M,
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
ONE_MILLION);
generateData(
file_1M_BS512M_PS4M,
configuration,
PARQUET_2_0,
BLOCK_SIZE_512M,
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
ONE_MILLION);
generateData(
file_1M_BS512M_PS8M,
configuration,
PARQUET_2_0,
BLOCK_SIZE_512M,
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
ONE_MILLION);
// generate data for different codecs
// generateData(parquetFile_1M_LZO, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT,
// FIXED_LEN_BYTEARRAY_SIZE, LZO, ONE_MILLION);
generateData(
file_1M_SNAPPY,
configuration,
PARQUET_2_0,
BLOCK_SIZE_DEFAULT,
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
SNAPPY,
ONE_MILLION);
generateData(
file_1M_GZIP,
configuration,
PARQUET_2_0,
BLOCK_SIZE_DEFAULT,
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
GZIP,
ONE_MILLION);
} catch (IOException e) {
throw new RuntimeException(e);
}
}