dlp/inspectGCSFile.js (112 lines of code) (raw):
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// sample-metadata:
// title: Inspect GCS File
// description: Inspects a text file stored on Google Cloud Storage with the Data Loss Prevention API, using Pub/Sub for job notifications.
// usage: node inspectGCSFile.js my-project filepath minLikelihood maxFindings infoTypes customInfoTypes includeQuote
async function main(
projectId,
bucketName,
fileName,
topicId,
subscriptionId,
minLikelihood,
maxFindings,
infoTypes,
customInfoTypes
) {
[infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes);
// [START dlp_inspect_gcs]
// Import the Google Cloud client libraries
const DLP = require('@google-cloud/dlp');
const {PubSub} = require('@google-cloud/pubsub');
// Instantiates clients
const dlp = new DLP.DlpServiceClient();
const pubsub = new PubSub();
// The project ID to run the API call under
// const projectId = 'my-project';
// The name of the bucket where the file resides.
// const bucketName = 'YOUR-BUCKET';
// The path to the file within the bucket to inspect.
// Can contain wildcards, e.g. "my-image.*"
// const fileName = 'my-image.png';
// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';
// The maximum number of findings to report per request (0 = server maximum)
// const maxFindings = 0;
// The infoTypes of information to match
// const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
// The customInfoTypes of information to match
// const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}},
// { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}];
// The name of the Pub/Sub topic to notify once the job completes
// TODO(developer): create a Pub/Sub topic to use for this
// const topicId = 'MY-PUBSUB-TOPIC'
// The name of the Pub/Sub subscription to use when listening for job
// completion notifications
// TODO(developer): create a Pub/Sub subscription to use for this
// const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION'
async function inspectGCSFile() {
// Get reference to the file to be inspected
const storageItem = {
cloudStorageOptions: {
fileSet: {url: `gs://${bucketName}/${fileName}`},
},
};
// Construct request for creating an inspect job
const request = {
parent: `projects/${projectId}/locations/global`,
inspectJob: {
inspectConfig: {
infoTypes: infoTypes,
customInfoTypes: customInfoTypes,
minLikelihood: minLikelihood,
limits: {
maxFindingsPerRequest: maxFindings,
},
},
storageConfig: storageItem,
actions: [
{
pubSub: {
topic: `projects/${projectId}/topics/${topicId}`,
},
},
],
},
};
// Create a GCS File inspection job and wait for it to complete
const [topicResponse] = await pubsub.topic(topicId).get();
// Verify the Pub/Sub topic and listen for job notifications via an
// existing subscription.
const subscription = await topicResponse.subscription(subscriptionId);
const [jobsResponse] = await dlp.createDlpJob(request);
// Get the job's ID
const jobName = jobsResponse.name;
// Watch the Pub/Sub topic until the DLP job finishes
await new Promise((resolve, reject) => {
const messageHandler = message => {
if (message.attributes && message.attributes.DlpJobName === jobName) {
message.ack();
subscription.removeListener('message', messageHandler);
subscription.removeListener('error', errorHandler);
resolve(jobName);
} else {
message.nack();
}
};
const errorHandler = err => {
subscription.removeListener('message', messageHandler);
subscription.removeListener('error', errorHandler);
reject(err);
};
subscription.on('message', messageHandler);
subscription.on('error', errorHandler);
});
setTimeout(() => {
console.log('Waiting for DLP job to fully complete');
}, 500);
const [job] = await dlp.getDlpJob({name: jobName});
console.log(`Job ${job.name} status: ${job.state}`);
const infoTypeStats = job.inspectDetails.result.infoTypeStats;
if (infoTypeStats.length > 0) {
infoTypeStats.forEach(infoTypeStat => {
console.log(
` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.`
);
});
} else {
console.log('No findings.');
}
}
await inspectGCSFile();
// [END dlp_inspect_gcs]
}
// TODO(developer): Please uncomment below line before running sample
// main(...process.argv.slice(2));
process.on('unhandledRejection', err => {
console.error(err.message);
process.exitCode = 1;
});
function transformCLI(infoTypes, customInfoTypes) {
infoTypes = infoTypes
? infoTypes.split(',').map(type => {
return {name: type};
})
: undefined;
if (customInfoTypes) {
customInfoTypes = customInfoTypes.includes(',')
? customInfoTypes.split(',').map((dict, idx) => {
return {
infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())},
dictionary: {wordList: {words: dict.split(',')}},
};
})
: customInfoTypes.split(',').map((rgx, idx) => {
return {
infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())},
regex: {pattern: rgx},
};
});
}
return [infoTypes, customInfoTypes];
}
module.exports = main;