in dlp/src/inspect_gcs_with_sampling.php [51:161]
function inspect_gcs_with_sampling(
// TODO(developer): Replace sample parameters before running the code.
string $callingProjectId,
string $gcsUri = 'gs://GOOGLE_STORAGE_BUCKET_NAME/dlp_sample.csv',
string $topicId = 'dlp-pubsub-topic',
string $subscriptionId = 'dlp_subcription'
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
$pubsub = new PubSubClient();
$topic = $pubsub->topic($topicId);
// Construct the items to be inspected.
$cloudStorageOptions = (new CloudStorageOptions())
->setFileSet((new FileSet())
->setUrl($gcsUri))
->setBytesLimitPerFile(200)
->setFilesLimitPercent(90)
->setSampleMethod(SampleMethod::RANDOM_START);
$storageConfig = (new StorageConfig())
->setCloudStorageOptions($cloudStorageOptions);
// Specify the type of info the inspection will look for.
$phoneNumberInfoType = (new InfoType())
->setName('PHONE_NUMBER');
$emailAddressInfoType = (new InfoType())
->setName('EMAIL_ADDRESS');
$cardNumberInfoType = (new InfoType())
->setName('CREDIT_CARD_NUMBER');
$infoTypes = [$phoneNumberInfoType, $emailAddressInfoType, $cardNumberInfoType];
// Specify how the content should be inspected.
$inspectConfig = (new InspectConfig())
->setInfoTypes($infoTypes)
->setIncludeQuote(true);
// Construct the action to run when job completes.
$action = (new Action())
->setPubSub((new PublishToPubSub())
->setTopic($topic->name()));
// Construct inspect job config to run.
$inspectJob = (new InspectJobConfig())
->setInspectConfig($inspectConfig)
->setStorageConfig($storageConfig)
->setActions([$action]);
// Listen for job notifications via an existing topic/subscription.
$subscription = $topic->subscription($subscriptionId);
// Submit request.
$parent = "projects/$callingProjectId/locations/global";
$job = $dlp->createDlpJob($parent, [
'inspectJob' => $inspectJob
]);
// Poll Pub/Sub using exponential backoff until job finishes.
// Consider using an asynchronous execution model such as Cloud Functions.
$attempt = 1;
$startTime = time();
do {
foreach ($subscription->pull() as $message) {
if (
isset($message->attributes()['DlpJobName']) &&
$message->attributes()['DlpJobName'] === $job->getName()
) {
$subscription->acknowledge($message);
// Get the updated job. Loop to avoid race condition with DLP API.
do {
$job = $dlp->getDlpJob($job->getName());
} while ($job->getState() == JobState::RUNNING);
break 2; // break from parent do while.
}
}
printf('Waiting for job to complete' . PHP_EOL);
// Exponential backoff with max delay of 60 seconds.
sleep(min(60, pow(2, ++$attempt)));
} while (time() - $startTime < 600); // 10 minute timeout.
// Print finding counts.
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
switch ($job->getState()) {
case JobState::DONE:
$infoTypeStats = $job->getInspectDetails()->getResult()->getInfoTypeStats();
if (count($infoTypeStats) === 0) {
printf('No findings.' . PHP_EOL);
} else {
foreach ($infoTypeStats as $infoTypeStat) {
printf(
' Found %s instance(s) of infoType %s' . PHP_EOL,
$infoTypeStat->getCount(),
$infoTypeStat->getInfoType()->getName()
);
}
}
break;
case JobState::FAILED:
printf('Job %s had errors:' . PHP_EOL, $job->getName());
$errors = $job->getErrors();
foreach ($errors as $error) {
var_dump($error->getDetails());
}
break;
case JobState::PENDING:
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
break;
default:
printf('Unexpected job state. Most likely, the job is either running or has not yet started.');
}
}