in dlp/snippets/src/main/java/dlp/snippets/DeidentifyCloudStorage.java [78:194]
public static void deidentifyCloudStorage(
String projectId,
String gcsPath,
String tableId,
String datasetId,
String outputDirectory,
String deidentifyTemplateId,
String structuredDeidentifyTemplateId,
String imageRedactTemplateId)
throws IOException, InterruptedException {
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Set path in Cloud Storage.
CloudStorageOptions cloudStorageOptions =
CloudStorageOptions.newBuilder()
.setFileSet(CloudStorageOptions.FileSet.newBuilder().setUrl(gcsPath))
.build();
// Set storage config indicating the type of cloud storage.
StorageConfig storageConfig =
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
List<InfoType> infoTypes = new ArrayList<>();
for (String typeName : new String[] {"PERSON_NAME", "EMAIL_ADDRESS"}) {
infoTypes.add(InfoType.newBuilder().setName(typeName).build());
}
InspectConfig inspectConfig =
InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setIncludeQuote(true).build();
// Types of files to include for de-identification.
List<FileType> fileTypesToTransform =
Arrays.asList(
FileType.valueOf("IMAGE"), FileType.valueOf("CSV"), FileType.valueOf("TEXT_FILE"));
// Specify the big query table to store the transformation details.
BigQueryTable table =
BigQueryTable.newBuilder()
.setProjectId(projectId)
.setTableId(tableId)
.setDatasetId(datasetId)
.build();
TransformationDetailsStorageConfig transformationDetailsStorageConfig =
TransformationDetailsStorageConfig.newBuilder().setTable(table).build();
// Specify the de-identify template used for the transformation.
TransformationConfig transformationConfig =
TransformationConfig.newBuilder()
.setDeidentifyTemplate(
ProjectDeidentifyTemplateName.of(projectId, deidentifyTemplateId).toString())
.setImageRedactTemplate(
ProjectDeidentifyTemplateName.of(projectId, imageRedactTemplateId).toString())
.setStructuredDeidentifyTemplate(
ProjectDeidentifyTemplateName.of(projectId, structuredDeidentifyTemplateId)
.toString())
.build();
Action.Deidentify deidentify =
Action.Deidentify.newBuilder()
.setCloudStorageOutput(outputDirectory)
.setTransformationConfig(transformationConfig)
.setTransformationDetailsStorageConfig(transformationDetailsStorageConfig)
.addAllFileTypesToTransform(fileTypesToTransform)
.build();
Action action = Action.newBuilder().setDeidentify(deidentify).build();
// Configure the long-running job we want the service to perform.
InspectJobConfig inspectJobConfig =
InspectJobConfig.newBuilder()
.setInspectConfig(inspectConfig)
.setStorageConfig(storageConfig)
.addActions(action)
.build();
// Construct the job creation request to be sent by the client.
CreateDlpJobRequest createDlpJobRequest =
CreateDlpJobRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setInspectJob(inspectJobConfig)
.build();
// Send the job creation request.
DlpJob response = dlp.createDlpJob(createDlpJobRequest);
// Get the current time.
long startTime = System.currentTimeMillis();
// Check if the job state is DONE.
while (response.getState() != DlpJob.JobState.DONE) {
// Sleep for 30 second.
Thread.sleep(30000);
// Get the updated job status.
response = dlp.getDlpJob(response.getName());
// Check if the timeout duration has exceeded.
long elapsedTime = System.currentTimeMillis() - startTime;
if (TimeUnit.MILLISECONDS.toMinutes(elapsedTime) >= TIMEOUT_MINUTES) {
System.out.printf("Job did not complete within %d minutes.%n", TIMEOUT_MINUTES);
break;
}
}
// Print the results.
System.out.println("Job status: " + response.getState());
System.out.println("Job name: " + response.getName());
InspectDataSourceDetails.Result result = response.getInspectDetails().getResult();
System.out.println("Findings: ");
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
System.out.println("\tCount: " + infoTypeStat.getCount());
}
}
}