in services/library/src/main/java/com/google/cloud/pso/bq_pii_classifier/functions/dispatcher/Dispatcher.java [221:285]
public List<JsonMessage> processDatasets(List<String> datasetIncludeList,
List<String> datasetExcludeList,
List<String> tableExcludeList
){
List<JsonMessage> allJsonMessages = new ArrayList<>();
for (String dataset : datasetIncludeList) {
String datasetLocation = "";
try {
if (!datasetExcludeList.contains(dataset)) {
List<String> tokens = Utils.tokenize(dataset, ".", true);
String projectId = tokens.get(0);
String datasetId = tokens.get(1);
datasetLocation = bqService.getDatasetLocation(projectId, datasetId).toLowerCase();
if (!config.getSourceDataRegions().contains(datasetLocation)) {
logger.logWarnWithTracker(runId,
String.format(
"Ignoring dataset %s in region '%s'. Only regions '%s' are configured.",
dataset,
datasetLocation,
config.getSourceDataRegions())
);
continue;
}
// get the inspection template to be use in the region of this dataset
if (!config.getDlpInspectionTemplatesIdsPerRegion().keySet().contains(datasetLocation)) {
String msg = String.format(
"No DLP inspection template(s) found for source data region '%s'",
datasetLocation);
throw new NonRetryableApplicationException(msg);
}
List<String> inspectionTemplatesIds = config.getDlpInspectionTemplatesIdsPerRegion().get(datasetLocation);
// get all tables that have DLP findings
List<String> tablesIncludeList = scanner.listChildren(projectId, datasetId);
if (tablesIncludeList.isEmpty()) {
String msg = String.format(
"No Tables found under dataset '%s'",
dataset);
logger.logWarnWithTracker(runId, msg);
} else {
logger.logInfoWithTracker(runId, String.format("Tables found in dataset %s : %s", dataset, tablesIncludeList));
// accumulate all messages to be omitted after the loop
List<JsonMessage> jsonMessages = processTables(tablesIncludeList,
tableExcludeList,
inspectionTemplatesIds,
datasetLocation.equals("eu")? "europe": datasetLocation);
allJsonMessages.addAll(jsonMessages);
}
}
} catch (Exception exception) {
// log and continue
logger.logFailedDispatcherEntityId(runId, dataset, exception);
}
}
return allJsonMessages;
}