dlp/kMapEstimationAnalysis.js (110 lines of code) (raw):

// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // sample-metadata: // title: kMap Estimation Analysis // description: Computes the k-map risk estimation of a column set in a Google BigQuery table. // usage: node kMapEstimationAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId regionCode quasiIds async function main( projectId, tableProjectId, datasetId, tableId, topicId, subscriptionId, regionCode, quasiIds, infoTypes ) { quasiIds = transformCLI(quasiIds, infoTypes); // [START dlp_k_map] // Import the Google Cloud client libraries const DLP = require('@google-cloud/dlp'); const {PubSub} = require('@google-cloud/pubsub'); // Instantiates clients const dlp = new DLP.DlpServiceClient(); const pubsub = new PubSub(); // The project ID to run the API call under // const projectId = 'my-project'; // The project ID the table is stored under // This may or (for public datasets) may not equal the calling project ID // const tableProjectId = 'my-project'; // The ID of the dataset to inspect, e.g. 'my_dataset' // const datasetId = 'my_dataset'; // The ID of the table to inspect, e.g. 'my_table' // const tableId = 'my_table'; // The name of the Pub/Sub topic to notify once the job completes // TODO(developer): create a Pub/Sub topic to use for this // const topicId = 'MY-PUBSUB-TOPIC' // The name of the Pub/Sub subscription to use when listening for job // completion notifications // TODO(developer): create a Pub/Sub subscription to use for this // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' // The ISO 3166-1 region code that the data is representative of // Can be omitted if using a region-specific infoType (such as US_ZIP_5) // const regionCode = 'USA'; // A set of columns that form a composite key ('quasi-identifiers'), and // optionally their reidentification distributions // const quasiIds = [{ field: { name: 'age' }, infoType: { name: 'AGE' }}]; async function kMapEstimationAnalysis() { const sourceTable = { projectId: tableProjectId, datasetId: datasetId, tableId: tableId, }; // Construct request for creating a risk analysis job const request = { parent: `projects/${projectId}/locations/global`, riskJob: { privacyMetric: { kMapEstimationConfig: { quasiIds: quasiIds, regionCode: regionCode, }, }, sourceTable: sourceTable, actions: [ { pubSub: { topic: `projects/${projectId}/topics/${topicId}`, }, }, ], }, }; // Create helper function for unpacking values const getValue = obj => obj[Object.keys(obj)[0]]; // Run risk analysis job const [topicResponse] = await pubsub.topic(topicId).get(); const subscription = await topicResponse.subscription(subscriptionId); const [jobsResponse] = await dlp.createDlpJob(request); const jobName = jobsResponse.name; console.log(`Job created. Job name: ${jobName}`); // Watch the Pub/Sub topic until the DLP job finishes await new Promise((resolve, reject) => { const messageHandler = message => { if (message.attributes && message.attributes.DlpJobName === jobName) { message.ack(); subscription.removeListener('message', messageHandler); subscription.removeListener('error', errorHandler); resolve(jobName); } else { message.nack(); } }; const errorHandler = err => { subscription.removeListener('message', messageHandler); subscription.removeListener('error', errorHandler); reject(err); }; subscription.on('message', messageHandler); subscription.on('error', errorHandler); }); setTimeout(() => { console.log(' Waiting for DLP job to fully complete'); }, 500); const [job] = await dlp.getDlpJob({name: jobName}); const histogramBuckets = job.riskDetails.kMapEstimationResult.kMapEstimationHistogram; histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { console.log(`Bucket ${histogramBucketIdx}:`); console.log( ` Anonymity range: [${histogramBucket.minAnonymity}, ${histogramBucket.maxAnonymity}]` ); console.log(` Size: ${histogramBucket.bucketSize}`); histogramBucket.bucketValues.forEach(valueBucket => { const values = valueBucket.quasiIdsValues.map(value => getValue(value)); console.log(` Values: ${values.join(' ')}`); console.log( ` Estimated k-map anonymity: ${valueBucket.estimatedAnonymity}` ); }); }); } await kMapEstimationAnalysis(); // [END dlp_k_map] } // TODO(developer): Please uncomment below line before running sample // main(...process.argv.slice(2)); process.on('unhandledRejection', err => { console.error(err.message); process.exitCode = 1; }); function transformCLI(quasiIds, infoTypes) { infoTypes = infoTypes ? infoTypes.split(',') : null; quasiIds = quasiIds ? quasiIds.split(',').map((name, index) => { return { field: { name: name, }, infoType: { name: infoTypes[index], }, }; }) : undefined; return quasiIds; } module.exports = main;