public static void deidentifyCloudStorage()

in dlp/snippets/src/main/java/dlp/snippets/DeidentifyCloudStorage.java [78:194]


  public static void deidentifyCloudStorage(
      String projectId,
      String gcsPath,
      String tableId,
      String datasetId,
      String outputDirectory,
      String deidentifyTemplateId,
      String structuredDeidentifyTemplateId,
      String imageRedactTemplateId)
      throws IOException, InterruptedException {

    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Set path in Cloud Storage.
      CloudStorageOptions cloudStorageOptions =
          CloudStorageOptions.newBuilder()
              .setFileSet(CloudStorageOptions.FileSet.newBuilder().setUrl(gcsPath))
              .build();

      // Set storage config indicating the type of cloud storage.
      StorageConfig storageConfig =
          StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();

      // Specify the type of info the inspection will look for.
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      List<InfoType> infoTypes = new ArrayList<>();
      for (String typeName : new String[] {"PERSON_NAME", "EMAIL_ADDRESS"}) {
        infoTypes.add(InfoType.newBuilder().setName(typeName).build());
      }

      InspectConfig inspectConfig =
          InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setIncludeQuote(true).build();

      // Types of files to include for de-identification.
      List<FileType> fileTypesToTransform =
          Arrays.asList(
              FileType.valueOf("IMAGE"), FileType.valueOf("CSV"), FileType.valueOf("TEXT_FILE"));

      // Specify the big query table to store the transformation details.
      BigQueryTable table =
          BigQueryTable.newBuilder()
              .setProjectId(projectId)
              .setTableId(tableId)
              .setDatasetId(datasetId)
              .build();

      TransformationDetailsStorageConfig transformationDetailsStorageConfig =
          TransformationDetailsStorageConfig.newBuilder().setTable(table).build();

      // Specify the de-identify template used for the transformation.
      TransformationConfig transformationConfig =
          TransformationConfig.newBuilder()
              .setDeidentifyTemplate(
                  ProjectDeidentifyTemplateName.of(projectId, deidentifyTemplateId).toString())
              .setImageRedactTemplate(
                  ProjectDeidentifyTemplateName.of(projectId, imageRedactTemplateId).toString())
              .setStructuredDeidentifyTemplate(
                  ProjectDeidentifyTemplateName.of(projectId, structuredDeidentifyTemplateId)
                      .toString())
              .build();

      Action.Deidentify deidentify =
          Action.Deidentify.newBuilder()
              .setCloudStorageOutput(outputDirectory)
              .setTransformationConfig(transformationConfig)
              .setTransformationDetailsStorageConfig(transformationDetailsStorageConfig)
              .addAllFileTypesToTransform(fileTypesToTransform)
              .build();

      Action action = Action.newBuilder().setDeidentify(deidentify).build();

      // Configure the long-running job we want the service to perform.
      InspectJobConfig inspectJobConfig =
          InspectJobConfig.newBuilder()
              .setInspectConfig(inspectConfig)
              .setStorageConfig(storageConfig)
              .addActions(action)
              .build();

      // Construct the job creation request to be sent by the client.
      CreateDlpJobRequest createDlpJobRequest =
          CreateDlpJobRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setInspectJob(inspectJobConfig)
              .build();

      // Send the job creation request.
      DlpJob response = dlp.createDlpJob(createDlpJobRequest);

      // Get the current time.
      long startTime = System.currentTimeMillis();

      // Check if the job state is DONE.
      while (response.getState() != DlpJob.JobState.DONE) {
        // Sleep for 30 second.
        Thread.sleep(30000);

        // Get the updated job status.
        response = dlp.getDlpJob(response.getName());

        // Check if the timeout duration has exceeded.
        long elapsedTime = System.currentTimeMillis() - startTime;
        if (TimeUnit.MILLISECONDS.toMinutes(elapsedTime) >= TIMEOUT_MINUTES) {
          System.out.printf("Job did not complete within %d minutes.%n", TIMEOUT_MINUTES);
          break;
        }
      }
      // Print the results.
      System.out.println("Job status: " + response.getState());
      System.out.println("Job name: " + response.getName());
      InspectDataSourceDetails.Result result = response.getInspectDetails().getResult();
      System.out.println("Findings: ");
      for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
        System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
        System.out.println("\tCount: " + infoTypeStat.getCount());
      }
    }
  }