public static void calculateLDiversity()

in dlp/snippets/src/main/java/dlp/snippets/RiskAnalysisLDiversity.java [68:195]


  public static void calculateLDiversity(
      String projectId, String datasetId, String tableId, String topicId, String subscriptionId)
      throws ExecutionException, InterruptedException, IOException {

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    DlpServiceSettings.Builder dlpServiceSettingsBuilder = DlpServiceSettings.newBuilder();
    dlpServiceSettingsBuilder
        .getDlpJobSettings()
        .setRetrySettings(
            dlpServiceSettingsBuilder
                .getDlpJobSettings()
                .getRetrySettings()
                .toBuilder()
                .setTotalTimeout(Duration.ofSeconds(600))
                .build());
    try (DlpServiceClient dlpServiceClient =
        DlpServiceClient.create(dlpServiceSettingsBuilder.build())) {
      // Specify the BigQuery table to analyze
      BigQueryTable bigQueryTable =
          BigQueryTable.newBuilder()
              .setProjectId(projectId)
              .setDatasetId(datasetId)
              .setTableId(tableId)
              .build();

      // These values represent the column names of quasi-identifiers to analyze
      List<String> quasiIds = Arrays.asList("Age", "Mystery");

      // This value represents the column name to compare the quasi-identifiers against
      String sensitiveAttribute = "Name";

      // Configure the privacy metric for the job
      FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build();
      List<FieldId> quasiIdFields =
          quasiIds.stream()
              .map(columnName -> FieldId.newBuilder().setName(columnName).build())
              .collect(Collectors.toList());
      LDiversityConfig ldiversityConfig =
          LDiversityConfig.newBuilder()
              .addAllQuasiIds(quasiIdFields)
              .setSensitiveAttribute(sensitiveAttributeField)
              .build();
      PrivacyMetric privacyMetric =
          PrivacyMetric.newBuilder().setLDiversityConfig(ldiversityConfig).build();

      // Create action to publish job status notifications over Google Cloud Pub/
      ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId);
      PublishToPubSub publishToPubSub =
          PublishToPubSub.newBuilder().setTopic(topicName.toString()).build();
      Action action = Action.newBuilder().setPubSub(publishToPubSub).build();

      // Configure the risk analysis job to perform
      RiskAnalysisJobConfig riskAnalysisJobConfig =
          RiskAnalysisJobConfig.newBuilder()
              .setSourceTable(bigQueryTable)
              .setPrivacyMetric(privacyMetric)
              .addActions(action)
              .build();

      // Build the request to be sent by the client
      CreateDlpJobRequest createDlpJobRequest =
          CreateDlpJobRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setRiskJob(riskAnalysisJobConfig)
              .build();

      // Send the request to the API using the client
      DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);

      // Set up a Pub/Sub subscriber to listen on the job completion status
      final SettableApiFuture<Boolean> done = SettableApiFuture.create();

      ProjectSubscriptionName subscriptionName =
          ProjectSubscriptionName.of(projectId, subscriptionId);

      MessageReceiver messageHandler =
          (PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> {
            handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer);
          };
      Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build();
      subscriber.startAsync();

      // Wait for job completion semi-synchronously
      // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
      try {
        done.get(15, TimeUnit.MINUTES);
      } catch (TimeoutException e) {
        System.out.println("Job was not completed after 15 minutes.");
        return;
      } finally {
        subscriber.stopAsync();
        subscriber.awaitTerminated();
      }

      // Build a request to get the completed job
      GetDlpJobRequest getDlpJobRequest =
          GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build();

      // Retrieve completed job status
      DlpJob completedJob = dlpServiceClient.getDlpJob(getDlpJobRequest);
      System.out.println("Job status: " + completedJob.getState());
      System.out.println("Job name: " + dlpJob.getName());

      // Get the result and parse through and process the information
      LDiversityResult ldiversityResult = completedJob.getRiskDetails().getLDiversityResult();
      List<LDiversityHistogramBucket> histogramBucketList =
          ldiversityResult.getSensitiveValueFrequencyHistogramBucketsList();
      for (LDiversityHistogramBucket result : histogramBucketList) {
        for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) {
          List<String> quasiIdValues =
              bucket.getQuasiIdsValuesList().stream()
                  .map(Value::toString)
                  .collect(Collectors.toList());

          System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
          System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());

          for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) {
            System.out.printf(
                "\t\tSensitive value %s occurs %d time(s).\n",
                valueFrequency.getValue().toString(), valueFrequency.getCount());
          }
        }
      }
    }
  }