dlp/inspectBigQueryTableWithSampling.js (95 lines of code) (raw):

// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // sample-metadata: // title: Inspect Bigquery with sampling // description: Inspects a BigQuery table using the Data Loss Prevention API to scan a 1000-row subset of a BigQuery table. // usage: node inspectBigQueryTableWithSampling.js my-project dataProjectId datasetId tableId topicId subscriptionId async function main( projectId, dataProjectId, datasetId, tableId, topicId, subscriptionId ) { // [START dlp_inspect_bigquery_with_sampling] // Import the Google Cloud client libraries const DLP = require('@google-cloud/dlp'); const {PubSub} = require('@google-cloud/pubsub'); // Instantiates clients const dlp = new DLP.DlpServiceClient(); const pubsub = new PubSub(); // The project ID to run the API call under // const projectId = 'my-project'; // The project ID the table is stored under // This may or (for public datasets) may not equal the calling project ID // const dataProjectId = 'my-project'; // The ID of the dataset to inspect, e.g. 'my_dataset' // const datasetId = 'my_dataset'; // The ID of the table to inspect, e.g. 'my_table' // const tableId = 'my_table'; // The name of the Pub/Sub topic to notify once the job completes // TODO(developer): create a Pub/Sub topic to use for this // const topicId = 'MY-PUBSUB-TOPIC' // The name of the Pub/Sub subscription to use when listening for job // completion notifications // TODO(developer): create a Pub/Sub subscription to use for this // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' // DLP Job max time (in milliseconds) const DLP_JOB_WAIT_TIME = 15 * 1000 * 60; async function inspectBigqueryWithSampling() { // Specify the type of info the inspection will look for. // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types const infoTypes = [{name: 'PERSON_NAME'}]; // Specify the BigQuery options required for inspection. const storageItem = { bigQueryOptions: { tableReference: { projectId: dataProjectId, datasetId: datasetId, tableId: tableId, }, rowsLimit: 1000, sampleMethod: DLP.protos.google.privacy.dlp.v2.BigQueryOptions.SampleMethod .RANDOM_START, includedFields: [{name: 'name'}], }, }; // Specify the action that is triggered when the job completes. const actions = [ { pubSub: { topic: `projects/${projectId}/topics/${topicId}`, }, }, ]; // Construct request for creating an inspect job const request = { parent: `projects/${projectId}/locations/global`, inspectJob: { inspectConfig: { infoTypes: infoTypes, includeQuote: true, }, storageConfig: storageItem, actions: actions, }, }; // Use the client to send the request. const [topicResponse] = await pubsub.topic(topicId).get(); // Verify the Pub/Sub topic and listen for job notifications via an // existing subscription. const subscription = await topicResponse.subscription(subscriptionId); const [jobsResponse] = await dlp.createDlpJob(request); const jobName = jobsResponse.name; // Watch the Pub/Sub topic until the DLP job finishes await new Promise((resolve, reject) => { // Set up the timeout const timer = setTimeout(() => { reject(new Error('Timeout')); }, DLP_JOB_WAIT_TIME); const messageHandler = message => { if (message.attributes && message.attributes.DlpJobName === jobName) { message.ack(); subscription.removeListener('message', messageHandler); subscription.removeListener('error', errorHandler); clearTimeout(timer); resolve(jobName); } else { message.nack(); } }; const errorHandler = err => { subscription.removeListener('message', messageHandler); subscription.removeListener('error', errorHandler); clearTimeout(timer); reject(err); }; subscription.on('message', messageHandler); subscription.on('error', errorHandler); }); const [job] = await dlp.getDlpJob({name: jobName}); console.log(`Job ${job.name} status: ${job.state}`); const infoTypeStats = job.inspectDetails.result.infoTypeStats; if (infoTypeStats.length > 0) { infoTypeStats.forEach(infoTypeStat => { console.log( ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` ); }); } else { console.log('No findings.'); } } await inspectBigqueryWithSampling(); // [END dlp_inspect_bigquery_with_sampling] } process.on('unhandledRejection', err => { console.error(err.message); process.exitCode = 1; }); // TODO(developer): Please uncomment below line before running sample // main(...process.argv.slice(2)); module.exports = main;