public List processDatasets()

in services/library/src/main/java/com/google/cloud/pso/bq_pii_classifier/functions/dispatcher/Dispatcher.java [221:285]


    public List<JsonMessage> processDatasets(List<String> datasetIncludeList,
                                             List<String> datasetExcludeList,
                                             List<String> tableExcludeList
    ){
        List<JsonMessage> allJsonMessages = new ArrayList<>();

        for (String dataset : datasetIncludeList) {
            String datasetLocation = "";
            try {

                if (!datasetExcludeList.contains(dataset)) {

                    List<String> tokens = Utils.tokenize(dataset, ".", true);
                    String projectId = tokens.get(0);
                    String datasetId = tokens.get(1);

                    datasetLocation = bqService.getDatasetLocation(projectId, datasetId).toLowerCase();
                    if (!config.getSourceDataRegions().contains(datasetLocation)) {
                        logger.logWarnWithTracker(runId,
                                String.format(
                                        "Ignoring dataset %s in region '%s'. Only regions '%s' are configured.",
                                        dataset,
                                        datasetLocation,
                                        config.getSourceDataRegions())
                        );
                        continue;
                    }

                    // get the inspection template to be use in the region of this dataset
                    if (!config.getDlpInspectionTemplatesIdsPerRegion().keySet().contains(datasetLocation)) {
                        String msg = String.format(
                                "No DLP inspection template(s) found for source data region '%s'",
                                datasetLocation);
                        throw new NonRetryableApplicationException(msg);
                    }
                    List<String> inspectionTemplatesIds = config.getDlpInspectionTemplatesIdsPerRegion().get(datasetLocation);

                    // get all tables that have DLP findings
                    List<String> tablesIncludeList = scanner.listChildren(projectId, datasetId);

                    if (tablesIncludeList.isEmpty()) {
                        String msg = String.format(
                                "No Tables found under dataset '%s'",
                                dataset);

                        logger.logWarnWithTracker(runId, msg);
                    } else {
                        logger.logInfoWithTracker(runId, String.format("Tables found in dataset %s : %s", dataset, tablesIncludeList));

                        // accumulate all messages to be omitted after the loop
                        List<JsonMessage> jsonMessages = processTables(tablesIncludeList,
                                tableExcludeList,
                                inspectionTemplatesIds,
                                datasetLocation.equals("eu")? "europe": datasetLocation);

                        allJsonMessages.addAll(jsonMessages);
                    }
                }
            } catch (Exception exception) {
                // log and continue
                logger.logFailedDispatcherEntityId(runId, dataset, exception);
            }
        }
        return allJsonMessages;
    }