dlp/snippets/risk/k_anonymity_with_entity_id.go (111 lines of code) (raw):

// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package risk // [START dlp_k_anonymity_with_entity_id] import ( "context" "fmt" "io" "strings" "time" dlp "cloud.google.com/go/dlp/apiv2" "cloud.google.com/go/dlp/apiv2/dlppb" ) // Uses the Data Loss Prevention API to compute the k-anonymity of a // column set in a Google BigQuery table. func calculateKAnonymityWithEntityId(w io.Writer, projectID, datasetId, tableId string, columnNames ...string) error { // projectID := "your-project-id" // datasetId := "your-bigquery-dataset-id" // tableId := "your-bigquery-table-id" // columnNames := "age" "job_title" ctx := context.Background() // Initialize a client once and reuse it to send multiple requests. Clients // are safe to use across goroutines. When the client is no longer needed, // call the Close method to cleanup its resources. client, err := dlp.NewClient(ctx) if err != nil { return err } // Closing the client safely cleans up background resources. defer client.Close() // Specify the BigQuery table to analyze bigQueryTable := &dlppb.BigQueryTable{ ProjectId: "bigquery-public-data", DatasetId: "samples", TableId: "wikipedia", } // Configure the privacy metric for the job // Build the QuasiID slice. var q []*dlppb.FieldId for _, c := range columnNames { q = append(q, &dlppb.FieldId{Name: c}) } entityId := &dlppb.EntityId{ Field: &dlppb.FieldId{ Name: "id", }, } kAnonymityConfig := &dlppb.PrivacyMetric_KAnonymityConfig{ QuasiIds: q, EntityId: entityId, } privacyMetric := &dlppb.PrivacyMetric{ Type: &dlppb.PrivacyMetric_KAnonymityConfig_{ KAnonymityConfig: kAnonymityConfig, }, } // Specify the bigquery table to store the findings. // The "test_results" table in the given BigQuery dataset will be created if it doesn't // already exist. outputbigQueryTable := &dlppb.BigQueryTable{ ProjectId: projectID, DatasetId: datasetId, TableId: tableId, } // Create action to publish job status notifications to BigQuery table. outputStorageConfig := &dlppb.OutputStorageConfig{ Type: &dlppb.OutputStorageConfig_Table{ Table: outputbigQueryTable, }, } findings := &dlppb.Action_SaveFindings{ OutputConfig: outputStorageConfig, } action := &dlppb.Action{ Action: &dlppb.Action_SaveFindings_{ SaveFindings: findings, }, } // Configure the risk analysis job to perform riskAnalysisJobConfig := &dlppb.RiskAnalysisJobConfig{ PrivacyMetric: privacyMetric, SourceTable: bigQueryTable, Actions: []*dlppb.Action{ action, }, } // Build the request to be sent by the client req := &dlppb.CreateDlpJobRequest{ Parent: fmt.Sprintf("projects/%s/locations/global", projectID), Job: &dlppb.CreateDlpJobRequest_RiskJob{ RiskJob: riskAnalysisJobConfig, }, } // Send the request to the API using the client dlpJob, err := client.CreateDlpJob(ctx, req) if err != nil { return err } fmt.Fprintf(w, "Created job: %v\n", dlpJob.GetName()) // Build a request to get the completed job getDlpJobReq := &dlppb.GetDlpJobRequest{ Name: dlpJob.Name, } timeout := 15 * time.Minute startTime := time.Now() var completedJob *dlppb.DlpJob // Wait for job completion for time.Since(startTime) <= timeout { completedJob, err = client.GetDlpJob(ctx, getDlpJobReq) if err != nil { return err } if completedJob.GetState() == dlppb.DlpJob_DONE { break } time.Sleep(30 * time.Second) } if completedJob.GetState() != dlppb.DlpJob_DONE { fmt.Println("Job did not complete within 15 minutes.") } // Retrieve completed job status fmt.Fprintf(w, "Job status: %v", completedJob.State) fmt.Fprintf(w, "Job name: %v", dlpJob.Name) // Get the result and parse through and process the information kanonymityResult := completedJob.GetRiskDetails().GetKAnonymityResult() for _, result := range kanonymityResult.GetEquivalenceClassHistogramBuckets() { fmt.Fprintf(w, "Bucket size range: [%d, %d]\n", result.GetEquivalenceClassSizeLowerBound(), result.GetEquivalenceClassSizeLowerBound()) for _, bucket := range result.GetBucketValues() { quasiIdValues := []string{} for _, v := range bucket.GetQuasiIdsValues() { quasiIdValues = append(quasiIdValues, v.GetStringValue()) } fmt.Fprintf(w, "\tQuasi-ID values: %s", strings.Join(quasiIdValues, ",")) fmt.Fprintf(w, "\tClass size: %d", bucket.EquivalenceClassSize) } } return nil } // [END dlp_k_anonymity_with_entity_id]