in c3r-cli-spark/src/main/java/com/amazonaws/c3r/spark/cli/SchemaMode.java [248:292]
public Integer call() {
try {
validate();
final File file = new File(requiredArgs.getInput());
final String fileNameNoPath = file.getName();
final String outFile = Objects.requireNonNullElse(optionalArgs.output, fileNameNoPath + ".json");
final FileFormat fileFormat = Optional.ofNullable(optionalArgs.fileFormat).orElseGet(() ->
FileFormat.fromFileName(requiredArgs.getInput()));
if (fileFormat == null) {
throw new C3rIllegalArgumentException("Unknown file format (consider using the --format flag): " + requiredArgs.getInput());
}
switch (fileFormat) {
case CSV:
final var csvSchemaGenerator = CsvSchemaGenerator.builder()
.inputCsvFile(requiredArgs.getInput())
.hasHeaders(optionalArgs.hasHeaders)
.targetJsonFile(outFile)
.overwrite(optionalArgs.overwrite)
.clientSettings(getClientSettings())
.build();
csvSchemaGenerator.generateSchema(subMode);
break;
case PARQUET:
if (!optionalArgs.hasHeaders) {
throw new C3rIllegalArgumentException("--noHeaders is not applicable for Parquet files.");
}
final var parquetSchemaGenerator = ParquetSchemaGenerator.builder()
.inputParquetFile(requiredArgs.getInput())
.targetJsonFile(outFile)
.overwrite(optionalArgs.overwrite)
.clientSettings(getClientSettings())
.sparkSession(sparkSession)
.build();
parquetSchemaGenerator.generateSchema(subMode);
break;
default:
throw new C3rIllegalArgumentException("Unsupported file format for schema generation: " + fileFormat);
}
} catch (Exception e) {
Main.handleException(e, optionalArgs.enableStackTraces);
return Main.FAILURE;
}
return Main.SUCCESS;
}