in services/library/src/main/java/com/google/cloud/pso/bq_pii_classifier/services/scan/StandardDlpResultsScannerImpl.java [100:139]
public List<String> listChildren(String project, String dataset) throws InterruptedException, NonRetryableApplicationException {
// dlp job names start with unix timestamp. Max() will get us the latest job
String queryTemplate =
"SELECT DISTINCT\n" +
"l.record_location.record_key.big_query_key.table_reference.table_id,\n" +
"MAX(job_name) AS latest_job_name\n" +
"FROM \n" +
"`%s.%s.%s`, UNNEST(location.content_locations) l\n" +
"WHERE l.record_location.record_key.big_query_key.table_reference.project_id = '%s'\n" +
"AND l.record_location.record_key.big_query_key.table_reference.dataset_id = '%s'\n" +
"GROUP BY 1\n" +
"ORDER BY 1,2 DESC\n";
String formattedQuery = String.format(queryTemplate,
hostProject,
hostDataset,
dlpFindingsTable,
project,
dataset
);
// Create a job ID so that we can safely retry.
Job queryJob = bqService.submitJob(formattedQuery);
TableResult result = bqService.waitAndGetJobResults(queryJob);
// Construct a mapping between field names and DLP infotypes
List<String> datasetTablesDlpJobs = new ArrayList<>();
for (FieldValueList row : result.iterateAll()) {
if (row.get("latest_job_name").isNull()) {
throw new NonRetryableApplicationException("processDatasets query returned rows with null 'latest_job_name' field.");
}
String jobName = row.get("latest_job_name").getStringValue();
datasetTablesDlpJobs.add(jobName);
}
return datasetTablesDlpJobs;
}