dlp/deidentifyWithDateShift.js (109 lines of code) (raw):
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
'use strict';
// sample-metadata:
// title: Deidentify with Date Shift
// description: Deidentify dates in a CSV file by pseudorandomly shifting them.
// usage: node deidentifyWithDateShift.js my-project dates.csv dates-shifted.csv 30 30 birth_date register_date [<YOUR_ENCRYPTED_AES_256_KEY> projects/my-project/locations/global/keyrings/my-keyring]
function main(
projectId,
inputCsvFile,
outputCsvFile,
dateFields,
lowerBoundDays,
upperBoundDays,
contextFieldId,
wrappedKey,
keyName
) {
dateFields = transformCLI(dateFields);
// [START dlp_deidentify_date_shift]
// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');
// Instantiates a client
const dlp = new DLP.DlpServiceClient();
// Import other required libraries
const fs = require('fs');
// The project ID to run the API call under
// const projectId = 'my-project';
// The path to the CSV file to deidentify
// The first row of the file must specify column names, and all other rows
// must contain valid values
// const inputCsvFile = '/path/to/input/file.csv';
// The path to save the date-shifted CSV file to
// const outputCsvFile = '/path/to/output/file.csv';
// The list of (date) fields in the CSV file to date shift
// const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }];
// The maximum number of days to shift a date backward
// const lowerBoundDays = 1;
// The maximum number of days to shift a date forward
// const upperBoundDays = 1;
// (Optional) The column to determine date shift amount based on
// If this is not specified, a random shift amount will be used for every row
// If this is specified, then 'wrappedKey' and 'keyName' must also be set
// const contextFieldId = [{ name: 'user_id' }];
// (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key
// If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set
// const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME';
// (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
// This key should be encrypted using the Cloud KMS key specified above
// If this is specified, then 'keyName' and 'contextFieldId' must also be set
// const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY'
// Helper function for converting CSV rows to Protobuf types
const rowToProto = row => {
const values = row.split(',');
const convertedValues = values.map(value => {
if (Date.parse(value)) {
const date = new Date(value);
return {
dateValue: {
year: date.getFullYear(),
month: date.getMonth() + 1,
day: date.getDate(),
},
};
} else {
// Convert all non-date values to strings
return {stringValue: value.toString()};
}
});
return {values: convertedValues};
};
async function deidentifyWithDateShift() {
// Read and parse a CSV file
const csvLines = fs
.readFileSync(inputCsvFile)
.toString()
.split('\n')
.filter(line => line.includes(','));
const csvHeaders = csvLines[0].split(',');
const csvRows = csvLines.slice(1);
// Construct the table object
const tableItem = {
table: {
headers: csvHeaders.map(header => {
return {name: header};
}),
rows: csvRows.map(row => rowToProto(row)),
},
};
// Construct DateShiftConfig
const dateShiftConfig = {
lowerBoundDays: lowerBoundDays,
upperBoundDays: upperBoundDays,
};
if (contextFieldId && keyName && wrappedKey) {
dateShiftConfig.context = {name: contextFieldId};
dateShiftConfig.cryptoKey = {
kmsWrapped: {
wrappedKey: wrappedKey,
cryptoKeyName: keyName,
},
};
} else if (contextFieldId || keyName || wrappedKey) {
throw new Error(
'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!'
);
}
// Construct deidentification request
const request = {
parent: `projects/${projectId}/locations/global`,
deidentifyConfig: {
recordTransformations: {
fieldTransformations: [
{
fields: dateFields,
primitiveTransformation: {
dateShiftConfig: dateShiftConfig,
},
},
],
},
},
item: tableItem,
};
// Run deidentification request
const [response] = await dlp.deidentifyContent(request);
const tableRows = response.item.table.rows;
// Write results to a CSV file
tableRows.forEach((row, rowIndex) => {
const rowValues = row.values.map(
value =>
value.stringValue ||
`${value.dateValue.month}/${value.dateValue.day}/${value.dateValue.year}`
);
csvLines[rowIndex + 1] = rowValues.join(',');
});
csvLines.push('');
fs.writeFileSync(outputCsvFile, csvLines.join('\n'));
// Print status
console.log(`Successfully saved date-shift output to ${outputCsvFile}`);
}
deidentifyWithDateShift();
// [END dlp_deidentify_date_shift]
}
main(...process.argv.slice(2));
process.on('unhandledRejection', err => {
console.error(err.message);
process.exitCode = 1;
});
function transformCLI(dateFields) {
return (dateFields = dateFields.split(',').map(type => {
return {name: type};
}));
}