in src/main/java/com/google/cloud/solutions/autotokenize/pipeline/EncryptionPipeline.java [159:185]
private Schema buildEncryptedSchema() {
checkArgument(
isNotBlank(options.getSchema())
|| (SourceType.CSV_FILE.equals(options.getSourceType())
&& options.getCsvHeaders() != null
&& !options.getCsvHeaders().isEmpty()),
"Provide Source's Avro Schema or headers for CSV_FILE.");
var inputSchema =
(options.getSchema() != null)
? new Schema.Parser().parse(options.getSchema())
: CsvRowFlatRecordConvertors.makeCsvAvroSchema(options.getCsvHeaders());
List<String> tokenizeColumnNames =
(options.getDlpEncryptConfigJson() == null)
?
// Use provided tokenizeColumnNames
options.getTokenizeColumns()
:
// For DLP Tokenize use columnNames from config
DeidentifyColumns.columnNamesIn(
JsonConvertor.parseJson(options.getDlpEncryptConfigJson(), DlpEncryptConfig.class));
return DeIdentifiedRecordSchemaConverter.withOriginalSchema(inputSchema)
.withEncryptColumnKeys(tokenizeColumnNames)
.updatedSchema();
}