dlp/inspectWithHotwordRules.js (106 lines of code) (raw):

// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. 'use strict'; // sample-metadata: // title: Inspects strings // description: Inspect a string using the Data Loss Prevention API. // usage: node inspectWithHotwordRules.js my-project string minLikelihood maxFindings infoTypes customInfoTypes includeQuote hotwordRegexPattern function main( projectId, string, minLikelihood, maxFindings, infoTypes, customInfoTypes, includeQuote, hotwordRegexPattern ) { [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); // [START dlp_inspect_hotword_rule] // Imports the Google Cloud Data Loss Prevention library const DLP = require('@google-cloud/dlp'); // Instantiates a client const dlp = new DLP.DlpServiceClient(); // The project ID to run the API call under // const projectId = 'my-project'; // The string to inspect // const string = 'Patients MRN 444-5-22222'; // The minimum likelihood required before returning a match // const minLikelihood = DLP.protos.google.privacy.dlp.v2.Likelihood.POSSIBLE; // The maximum number of findings to report per request (0 = server maximum) // const maxFindings = 0; // The infoTypes of information to match // See https://cloud.google.com/dlp/docs/concepts-infotypes for more information // about supported infoTypes. // const infoTypes = [{ name: 'EMAIL_ADDRESS' }]; // The customInfoTypes of information to match // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, // { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}]; // Whether to include the matching string // const includeQuote = true; // Custom hotword regex patten // const hotwordRegexPattern = '(?i)(mrn|medical)(?-i)'; async function inspectWithHotwordRule() { // Construct item to inspect const item = { byteItem: { type: DLP.protos.google.privacy.dlp.v2.ByteContentItem.BytesType .TEXT_UTF8, data: Buffer.from(string, 'utf-8'), }, }; // Construct a hot word rule const hotwordRule = { hotwordRegex: { pattern: hotwordRegexPattern, }, proximity: { windowBefore: 10, }, likelihoodAdjustment: { fixedLikelihood: DLP.protos.google.privacy.dlp.v2.Likelihood.VERY_LIKELY, }, }; // Construct a hotword inspection rule const inpectionRuleSet = [ { infoTypes: customInfoTypes.map( customInfoType => customInfoType.infoType ), rules: [{hotwordRule: hotwordRule}], }, ]; // Assigns likelihood to each match customInfoTypes = customInfoTypes.map(customInfoType => { customInfoType.likelihood = DLP.protos.google.privacy.dlp.v2.Likelihood.POSSIBLE; return customInfoType; }); // Construct request const request = { parent: `projects/${projectId}/locations/global`, inspectConfig: { infoTypes: infoTypes, customInfoTypes: customInfoTypes, minLikelihood: minLikelihood, includeQuote: includeQuote, limits: { maxFindingsPerRequest: maxFindings, }, ruleSet: inpectionRuleSet, }, item: item, }; // Run request const [response] = await dlp.inspectContent(request); const findings = response.result.findings; if (findings.length > 0) { console.log('Findings:'); findings.forEach(finding => { if (includeQuote) { console.log(`\tQuote: ${finding.quote}`); } console.log(`\tInfo type: ${finding.infoType.name}`); console.log(`\tLikelihood: ${finding.likelihood}`); }); } else { console.log('No findings.'); } } inspectWithHotwordRule(); // [END dlp_inspect_hotword_rule] } main(...process.argv.slice(2)); process.on('unhandledRejection', err => { console.error(err.message); process.exitCode = 1; }); function transformCLI(infoTypes, customInfoTypes) { infoTypes = infoTypes ? infoTypes.split(',').map(type => { return {name: type}; }) : undefined; if (customInfoTypes) { customInfoTypes = customInfoTypes.includes(',') ? customInfoTypes.split(',').map((dict, idx) => { return { infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, dictionary: {wordList: {words: dict.split(',')}}, }; }) : customInfoTypes.split(',').map((rgx, idx) => { return { infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, regex: {pattern: rgx}, }; }); } return [infoTypes, customInfoTypes]; }