in c3r-sdk-parquet/src/main/java/com/amazonaws/c3r/action/ParquetRowMarshaller.java [78:106]
private static RowMarshaller<ParquetValue> newInstance(
@NonNull final String sourceFile,
@NonNull final String targetFile,
@NonNull final String tempDir,
@NonNull final ClientSettings settings,
@NonNull final TableSchema schema,
@NonNull final Map<ColumnType, Transformer> transforms,
final Boolean binaryAsString) {
if (schema.getPositionalColumnHeaders() != null) {
throw new C3rIllegalArgumentException("Parquet files require a mapped table schema.");
}
final ParquetRowReader reader = ParquetRowReader.builder().sourceName(sourceFile).binaryAsString(binaryAsString).build();
final ParquetSchema sourceParquetSchema = reader.getParquetSchema();
final ParquetSchema targetParquetSchema = sourceParquetSchema.deriveTargetSchema(schema);
final ParquetRowWriter writer = ParquetRowWriter.builder()
.targetName(targetFile)
.parquetSchema(targetParquetSchema)
.build();
return RowMarshaller.<ParquetValue>builder()
.settings(settings)
.schema(schema)
.tempDir(tempDir)
.inputReader(reader)
.valueFactory(new ParquetValueFactory(targetParquetSchema.getColumnParquetDataTypeMap()))
.outputWriter(writer)
.transformers(transforms)
.build();
}