c3r-cli-spark/src/main/java/com/amazonaws/c3r/spark/io/schema/TemplateSchemaGenerator.java [34:219]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@Slf4j
public final class TemplateSchemaGenerator {

    /**
     * String for user-facing messaging showing column type options.
     */
    private static final String ALL_COLUMN_TYPES = "[" +
            Arrays.stream(ColumnType.values())
                    .map(ColumnType::toString)
                    .collect(Collectors.joining("|")) +
            "]";

    /**
     * String for user-facing messaging showing column type options.
     */
    private static final String ALL_COLUMN_TYPES_SANS_CLEARTEXT = "[" +
            Arrays.stream(ColumnType.values())
                    .filter(c -> c != ColumnType.CLEARTEXT)
                    .map(ColumnType::toString)
                    .collect(Collectors.joining("|")) +
            "]";

    /**
     * The contents to be printed for each pad in the output, along with instructions on how to use it.
     */
    private static final JsonObject EXAMPLE_PAD;

    static {
        EXAMPLE_PAD = new JsonObject();
        EXAMPLE_PAD.addProperty("COMMENT", "omit this pad entry unless column type is sealed");
        EXAMPLE_PAD.addProperty("type", "[none|fixed|max]");
        EXAMPLE_PAD.addProperty("length", "omit length property for type none, otherwise specify value in [0, 10000]");
    }

    /**
     * Console output stream.
     */
    private final PrintStream consoleOutput;

    /**
     * Names of the columns in the input data.
     */
    private final List<ColumnHeader> headers;

    /**
     * Number of source columns.
     */
    private final int sourceColumnCount;

    /**
     * Source column types (in the order they appear in the input file).
     */
    private final List<ClientDataType> sourceColumnTypes;

    /**
     * Where to write the schema file.
     */
    private final String targetJsonFile;

    /**
     * Options for column types based on ClientSettings (if provided).
     */
    private final String columnTypeOptions;

    /**
     * Whether this schema can have cleartext columns.
     */
    private final boolean allowCleartextColumns;

    /**
     * Initializes the automated schema generator.
     *
     * @param sourceHeaders     List of column names in the input file
     * @param sourceColumnTypes Source column types (in the order they appear in the input file)
     * @param targetJsonFile    Where to write the schema
     * @param consoleOutput     Connection to output stream (i.e., output for user)
     * @param clientSettings    Collaboration's client settings if provided, else {@code null}
     * @throws C3rIllegalArgumentException If input sizes are inconsistent
     */
    @Builder
    private TemplateSchemaGenerator(final List<ColumnHeader> sourceHeaders,
                                    @NonNull final List<ClientDataType> sourceColumnTypes,
                                    @NonNull final String targetJsonFile,
                                    final PrintStream consoleOutput,
                                    final ClientSettings clientSettings) {
        if (sourceHeaders != null && sourceHeaders.size() != sourceColumnTypes.size()) {
            throw new C3rIllegalArgumentException("Template schema generator given "
                    + sourceHeaders.size() + " headers and " + sourceColumnTypes.size() + " column data types.");
        }
        this.headers = sourceHeaders == null ? null : List.copyOf(sourceHeaders);
        this.sourceColumnTypes = sourceColumnTypes;
        this.sourceColumnCount = sourceColumnTypes.size();
        this.targetJsonFile = targetJsonFile;
        this.consoleOutput = (consoleOutput == null) ? new PrintStream(System.out, true, StandardCharsets.UTF_8)
                : consoleOutput;
        allowCleartextColumns = clientSettings == null || clientSettings.isAllowCleartext();
        if (allowCleartextColumns) {
            columnTypeOptions = ALL_COLUMN_TYPES;
        } else {
            columnTypeOptions = ALL_COLUMN_TYPES_SANS_CLEARTEXT;
        }
    }

    /**
     * Creates template column schemas from the provided (non-{@code null}) source {@code headers}.
     *
     * @return The generated template column schemas
     */
    private JsonArray generateTemplateColumnSchemasFromSourceHeaders() {
        final var columnSchemaArray = new JsonArray(headers.size());
        for (int i = 0; i < sourceColumnCount; i++) {
            final var header = headers.get(i);
            final var entry = new JsonObject();
            entry.addProperty("sourceHeader", header.toString());
            entry.addProperty("targetHeader", header.toString());
            if (sourceColumnTypes.get(i) != ClientDataType.UNKNOWN) {
                entry.addProperty("type", columnTypeOptions);
                entry.add("pad", EXAMPLE_PAD);
            } else if (allowCleartextColumns) {
                consoleOutput.println(SchemaGeneratorUtils.unsupportedTypeWarning(header, i));
                entry.addProperty("type", ColumnType.CLEARTEXT.toString());
            } else {
                consoleOutput.println(SchemaGeneratorUtils.unsupportedTypeSkippingColumnWarning(header, i));
                continue;
            }
            columnSchemaArray.add(entry);
        }
        return columnSchemaArray;
    }

    /**
     * Creates template column schemas for headerless source.
     *
     * @return The generated template column schemas
     */
    private JsonArray generateTemplateColumnSchemasFromColumnCount() {
        final var columnSchemaArray = new JsonArray(sourceColumnCount);
        for (int i = 0; i < sourceColumnCount; i++) {
            // Array template entry will go in
            final var entryArray = new JsonArray(1);
            // template entry
            final var templateEntry = new JsonObject();
            templateEntry.addProperty("targetHeader", ColumnHeader.of(i).toString());
            if (sourceColumnTypes.get(i) != ClientDataType.UNKNOWN) {
                templateEntry.addProperty("type", columnTypeOptions);
                templateEntry.add("pad", EXAMPLE_PAD);
                entryArray.add(templateEntry);
            } else if (allowCleartextColumns) {
                templateEntry.addProperty("type", ColumnType.CLEARTEXT.toString());
                entryArray.add(templateEntry);
            } else {
                // If the column type does not support cryptographic computing and cleartext columns are not allowed,
                // then we do not add a template entry to the array, and we warn the user this column has been skipped.
                consoleOutput.println(SchemaGeneratorUtils.unsupportedTypeSkippingColumnWarning(null, i));
            }
            columnSchemaArray.add(entryArray);
        }
        return columnSchemaArray;
    }

    /**
     * Generate a template schema. I.e., the type (see {@link ColumnType}) and padding
     * (see {@link PadType}) are left with all possible options and must be manually edited.
     *
     * @throws C3rRuntimeException If unable to write to the target file
     */
    public void run() {
        final var schemaContent = new JsonObject();
        if (headers != null) {
            schemaContent.addProperty("headerRow", true);
            schemaContent.add("columns", generateTemplateColumnSchemasFromSourceHeaders());
        } else {
            schemaContent.addProperty("headerRow", false);
            schemaContent.add("columns", generateTemplateColumnSchemasFromColumnCount());
        }

        try (BufferedWriter writer = Files.newBufferedWriter(Path.of(targetJsonFile), StandardCharsets.UTF_8)) {
            writer.write(GsonUtil.toJson(schemaContent));
        } catch (IOException e) {
            throw new C3rRuntimeException("Could not write to target schema file.", e);
        }
        log.info("Template schema written to {}.", targetJsonFile);
        log.info("Schema requires manual modification before use:");
        log.info("  * Types for each column must be selected.");
        log.info("  * Pad entry must be modified for each sealed column and removed for other column types.");
        log.info("Resulting schema must be valid JSON (e.g., final entries in objects have no trailing comma, etc).");
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



c3r-cli/src/main/java/com/amazonaws/c3r/io/schema/TemplateSchemaGenerator.java [33:218]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@Slf4j
public final class TemplateSchemaGenerator {

    /**
     * String for user-facing messaging showing column type options.
     */
    private static final String ALL_COLUMN_TYPES = "[" +
            Arrays.stream(ColumnType.values())
                    .map(ColumnType::toString)
                    .collect(Collectors.joining("|")) +
            "]";

    /**
     * String for user-facing messaging showing column type options.
     */
    private static final String ALL_COLUMN_TYPES_SANS_CLEARTEXT = "[" +
            Arrays.stream(ColumnType.values())
                    .filter(c -> c != ColumnType.CLEARTEXT)
                    .map(ColumnType::toString)
                    .collect(Collectors.joining("|")) +
            "]";

    /**
     * The contents to be printed for each pad in the output, along with instructions on how to use it.
     */
    private static final JsonObject EXAMPLE_PAD;

    static {
        EXAMPLE_PAD = new JsonObject();
        EXAMPLE_PAD.addProperty("COMMENT", "omit this pad entry unless column type is sealed");
        EXAMPLE_PAD.addProperty("type", "[none|fixed|max]");
        EXAMPLE_PAD.addProperty("length", "omit length property for type none, otherwise specify value in [0, 10000]");
    }

    /**
     * Console output stream.
     */
    private final PrintStream consoleOutput;

    /**
     * Names of the columns in the input data.
     */
    private final List<ColumnHeader> headers;

    /**
     * Number of source columns.
     */
    private final int sourceColumnCount;

    /**
     * Source column types (in the order they appear in the input file).
     */
    private final List<ClientDataType> sourceColumnTypes;

    /**
     * Where to write the schema file.
     */
    private final String targetJsonFile;

    /**
     * Options for column types based on ClientSettings (if provided).
     */
    private final String columnTypeOptions;

    /**
     * Whether this schema can have cleartext columns.
     */
    private final boolean allowCleartextColumns;

    /**
     * Initializes the automated schema generator.
     *
     * @param sourceHeaders     List of column names in the input file
     * @param sourceColumnTypes Source column types (in the order they appear in the input file)
     * @param targetJsonFile    Where to write the schema
     * @param consoleOutput     Connection to output stream (i.e., output for user)
     * @param clientSettings    Collaboration's client settings if provided, else {@code null}
     * @throws C3rIllegalArgumentException If input sizes are inconsistent
     */
    @Builder
    private TemplateSchemaGenerator(final List<ColumnHeader> sourceHeaders,
                                    @NonNull final List<ClientDataType> sourceColumnTypes,
                                    @NonNull final String targetJsonFile,
                                    final PrintStream consoleOutput,
                                    final ClientSettings clientSettings) {
        if (sourceHeaders != null && sourceHeaders.size() != sourceColumnTypes.size()) {
            throw new C3rIllegalArgumentException("Template schema generator given "
                    + sourceHeaders.size() + " headers and " + sourceColumnTypes.size() + " column data types.");
        }
        this.headers = sourceHeaders == null ? null : List.copyOf(sourceHeaders);
        this.sourceColumnTypes = sourceColumnTypes;
        this.sourceColumnCount = sourceColumnTypes.size();
        this.targetJsonFile = targetJsonFile;
        this.consoleOutput = (consoleOutput == null) ? new PrintStream(System.out, true, StandardCharsets.UTF_8)
                : consoleOutput;
        allowCleartextColumns = clientSettings == null || clientSettings.isAllowCleartext();
        if (allowCleartextColumns) {
            columnTypeOptions = ALL_COLUMN_TYPES;
        } else {
            columnTypeOptions = ALL_COLUMN_TYPES_SANS_CLEARTEXT;
        }
    }

    /**
     * Creates template column schemas from the provided (non-{@code null}) source {@code headers}.
     *
     * @return The generated template column schemas
     */
    private JsonArray generateTemplateColumnSchemasFromSourceHeaders() {
        final var columnSchemaArray = new JsonArray(headers.size());
        for (int i = 0; i < sourceColumnCount; i++) {
            final var header = headers.get(i);
            final var entry = new JsonObject();
            entry.addProperty("sourceHeader", header.toString());
            entry.addProperty("targetHeader", header.toString());
            if (sourceColumnTypes.get(i) != ClientDataType.UNKNOWN) {
                entry.addProperty("type", columnTypeOptions);
                entry.add("pad", EXAMPLE_PAD);
            } else if (allowCleartextColumns) {
                consoleOutput.println(SchemaGeneratorUtils.unsupportedTypeWarning(header, i));
                entry.addProperty("type", ColumnType.CLEARTEXT.toString());
            } else {
                consoleOutput.println(SchemaGeneratorUtils.unsupportedTypeSkippingColumnWarning(header, i));
                continue;
            }
            columnSchemaArray.add(entry);
        }
        return columnSchemaArray;
    }

    /**
     * Creates template column schemas for headerless source.
     *
     * @return The generated template column schemas
     */
    private JsonArray generateTemplateColumnSchemasFromColumnCount() {
        final var columnSchemaArray = new JsonArray(sourceColumnCount);
        for (int i = 0; i < sourceColumnCount; i++) {
            // Array template entry will go in
            final var entryArray = new JsonArray(1);
            // template entry
            final var templateEntry = new JsonObject();
            templateEntry.addProperty("targetHeader", ColumnHeader.of(i).toString());
            if (sourceColumnTypes.get(i) != ClientDataType.UNKNOWN) {
                templateEntry.addProperty("type", columnTypeOptions);
                templateEntry.add("pad", EXAMPLE_PAD);
                entryArray.add(templateEntry);
            } else if (allowCleartextColumns) {
                templateEntry.addProperty("type", ColumnType.CLEARTEXT.toString());
                entryArray.add(templateEntry);
            } else {
                // If the column type does not support cryptographic computing and cleartext columns are not allowed,
                // then we do not add a template entry to the array, and we warn the user this column has been skipped.
                consoleOutput.println(SchemaGeneratorUtils.unsupportedTypeSkippingColumnWarning(null, i));
            }
            columnSchemaArray.add(entryArray);
        }
        return columnSchemaArray;
    }

    /**
     * Generate a template schema. I.e., the type (see {@link com.amazonaws.c3r.config.ColumnType}) and padding
     * (see {@link com.amazonaws.c3r.config.PadType}) are left with all possible options and must be manually edited.
     *
     * @throws C3rRuntimeException If unable to write to the target file
     */
    public void run() {
        final var schemaContent = new JsonObject();
        if (headers != null) {
            schemaContent.addProperty("headerRow", true);
            schemaContent.add("columns", generateTemplateColumnSchemasFromSourceHeaders());
        } else {
            schemaContent.addProperty("headerRow", false);
            schemaContent.add("columns", generateTemplateColumnSchemasFromColumnCount());
        }

        try (BufferedWriter writer = Files.newBufferedWriter(Path.of(targetJsonFile), StandardCharsets.UTF_8)) {
            writer.write(GsonUtil.toJson(schemaContent));
        } catch (IOException e) {
            throw new C3rRuntimeException("Could not write to target schema file.", e);
        }
        log.info("Template schema written to {}.", targetJsonFile);
        log.info("Schema requires manual modification before use:");
        log.info("  * Types for each column must be selected.");
        log.info("  * Pad entry must be modified for each sealed column and removed for other column types.");
        log.info("Resulting schema must be valid JSON (e.g., final entries in objects have no trailing comma, etc).");
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



