in dlp/src/deidentify_cloud_storage.php [68:181]
function deidentify_cloud_storage(
// TODO(developer): Replace sample parameters before running the code.
string $callingProjectId,
string $inputgcsPath = 'gs://YOUR_GOOGLE_STORAGE_BUCKET',
string $outgcsPath = 'gs://YOUR_GOOGLE_STORAGE_BUCKET',
string $deidentifyTemplateName = 'YOUR_DEIDENTIFY_TEMPLATE_NAME',
string $structuredDeidentifyTemplateName = 'YOUR_STRUCTURED_DEIDENTIFY_TEMPLATE_NAME',
string $imageRedactTemplateName = 'YOUR_IMAGE_REDACT_DEIDENTIFY_TEMPLATE_NAME',
string $datasetId = 'YOUR_DATASET_ID',
string $tableId = 'YOUR_TABLE_ID'
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
$parent = "projects/$callingProjectId/locations/global";
// Specify the GCS Path to be de-identify.
$cloudStorageOptions = (new CloudStorageOptions())
->setFileSet((new FileSet())
->setUrl($inputgcsPath));
$storageConfig = (new StorageConfig())
->setCloudStorageOptions(($cloudStorageOptions));
// Specify the type of info the inspection will look for.
$inspectConfig = (new InspectConfig())
->setInfoTypes([
(new InfoType())->setName('PERSON_NAME'),
(new InfoType())->setName('EMAIL_ADDRESS')
]);
// Specify the big query table to store the transformation details.
$transformationDetailsStorageConfig = (new TransformationDetailsStorageConfig())
->setTable((new BigQueryTable())
->setProjectId($callingProjectId)
->setDatasetId($datasetId)
->setTableId($tableId));
// Specify the de-identify template used for the transformation.
$transformationConfig = (new TransformationConfig())
->setDeidentifyTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $deidentifyTemplateName)
)
->setStructuredDeidentifyTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $structuredDeidentifyTemplateName)
)
->setImageRedactTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $imageRedactTemplateName)
);
$deidentify = (new Deidentify())
->setCloudStorageOutput($outgcsPath)
->setTransformationConfig($transformationConfig)
->setTransformationDetailsStorageConfig($transformationDetailsStorageConfig)
->setFileTypesToTransform([FileType::TEXT_FILE, FileType::IMAGE, FileType::CSV]);
$action = (new Action())
->setDeidentify($deidentify);
// Configure the inspection job we want the service to perform.
$inspectJobConfig = (new InspectJobConfig())
->setInspectConfig($inspectConfig)
->setStorageConfig($storageConfig)
->setActions([$action]);
// Send the job creation request and process the response.
$createDlpJobRequest = (new CreateDlpJobRequest())
->setParent($parent)
->setInspectJob($inspectJobConfig);
$job = $dlp->createDlpJob($createDlpJobRequest);
$numOfAttempts = 10;
do {
printf('Waiting for job to complete' . PHP_EOL);
sleep(30);
$getDlpJobRequest = (new GetDlpJobRequest())
->setName($job->getName());
$job = $dlp->getDlpJob($getDlpJobRequest);
if ($job->getState() == JobState::DONE) {
break;
}
$numOfAttempts--;
} while ($numOfAttempts > 0);
// Print finding counts.
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
switch ($job->getState()) {
case JobState::DONE:
$infoTypeStats = $job->getInspectDetails()->getResult()->getInfoTypeStats();
if (count($infoTypeStats) === 0) {
printf('No findings.' . PHP_EOL);
} else {
foreach ($infoTypeStats as $infoTypeStat) {
printf(
' Found %s instance(s) of infoType %s' . PHP_EOL,
$infoTypeStat->getCount(),
$infoTypeStat->getInfoType()->getName()
);
}
}
break;
case JobState::FAILED:
printf('Job %s had errors:' . PHP_EOL, $job->getName());
$errors = $job->getErrors();
foreach ($errors as $error) {
var_dump($error->getDetails());
}
break;
case JobState::PENDING:
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
break;
default:
printf('Unexpected job state. Most likely, the job is either running or has not yet started.');
}
}